fix(image): Cap Uploaded File Image Count (#10298 )

fix(scim): add advisory lock to prevent seat limit race condition (#10048 ) to release v3.1 (#10067 )
feat(federated): full thread replies + direct URL fetch in Slack search (#9940 ) to release v3.1 (#10051 )
2026-04-17 07:26:45 +00:00 · 2026-04-16 21:34:18 -07:00 · 2026-04-10 12:43:08 -07:00 · 2026-04-09 18:24:08 -07:00 · 2026-04-09 13:58:30 -07:00 · 2026-04-08 10:55:58 -07:00
231 changed files with 5822 additions and 12180 deletions
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -47,8 +47,7 @@ jobs:
          done

      - name: Publish Helm charts to gh-pages
-        # NOTE: HEAD of https://github.com/stefanprodan/helm-gh-pages/pull/43
-        uses: stefanprodan/helm-gh-pages@ad32ad3b8720abfeaac83532fd1e9bdfca5bbe27 # zizmor: ignore[impostor-commit]
+        uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          charts_dir: deployment/helm/charts
--- a/.github/workflows/nightly-llm-provider-chat.yml
+++ b/.github/workflows/nightly-llm-provider-chat.yml
@@ -35,7 +35,6 @@ jobs:
    needs: [provider-chat-test]
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 5
    steps:
      - name: Checkout
--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -183,7 +183,6 @@ jobs:
      - cherry-pick-to-latest-release
    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
@@ -233,7 +232,6 @@ jobs:
      - cherry-pick-to-latest-release
    if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
--- a/.github/workflows/pr-desktop-build.yml
+++ b/.github/workflows/pr-desktop-build.yml
@@ -63,7 +63,7 @@ jobs:
          targets: ${{ matrix.target }}

      - name: Cache Cargo registry and build
-        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # zizmor: ignore[cache-poisoning]
+        uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
        with:
          path: |
            ~/.cargo/bin/
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -284,7 +284,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
@@ -626,7 +626,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -56,7 +56,7 @@ jobs:

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
-        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: .mypy_cache
          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -31,7 +31,6 @@ jobs:
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-model-check"
      - "extras=ecr-cache"
-    environment: ci-protected
    timeout-minutes: 45

    env:
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -15,7 +15,6 @@ permissions:
 jobs:
  Deploy-Preview:
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
--- a/.github/workflows/release-cli.yml
+++ b/.github/workflows/release-cli.yml
@@ -13,6 +13,15 @@ jobs:
    permissions:
      id-token: write
    timeout-minutes: 10
+    strategy:
+      matrix:
+        os-arch:
+          - { goos: "linux", goarch: "amd64" }
+          - { goos: "linux", goarch: "arm64" }
+          - { goos: "windows", goarch: "amd64" }
+          - { goos: "windows", goarch: "arm64" }
+          - { goos: "darwin", goarch: "amd64" }
+          - { goos: "darwin", goarch: "arm64" }
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
@@ -22,11 +31,9 @@ jobs:
          enable-cache: false
          version: "0.9.9"
      - run: |
-          for goos in linux windows darwin; do
-            for goarch in amd64 arm64; do
-              GOOS="$goos" GOARCH="$goarch" uv build --wheel
-            done
-          done
+          GOOS="${{ matrix.os-arch.goos }}" \
+          GOARCH="${{ matrix.os-arch.goarch }}" \
+          uv build --wheel
        working-directory: cli
      - run: uv publish
        working-directory: cli
--- a/.github/workflows/storybook-deploy.yml
+++ b/.github/workflows/storybook-deploy.yml
@@ -25,7 +25,6 @@ permissions:
 jobs:
  Deploy-Storybook:
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
@@ -55,7 +54,6 @@ jobs:
    needs: Deploy-Storybook
    if: always() && needs.Deploy-Storybook.result == 'failure'
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -9,7 +9,6 @@ on:
 jobs:
  sync-foss:
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 45
    permissions:
      contents: read
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -11,7 +11,6 @@ permissions:
 jobs:
  create-and-push-tag:
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 45

    steps:
--- a/.greptile/rules.md
+++ b/.greptile/rules.md
@@ -24,16 +24,6 @@ When hardcoding a boolean variable to a constant value, remove the variable enti

 Code changes must consider both multi-tenant and single-tenant deployments. In multi-tenant mode, preserve tenant isolation, ensure tenant context is propagated correctly, and avoid assumptions that only hold for a single shared schema or globally shared state. In single-tenant mode, avoid introducing unnecessary tenant-specific requirements or cloud-only control-plane dependencies.

-## Nginx Routing — New Backend Routes
-
-Whenever a new backend route is added that does NOT start with `/api`, it must also be explicitly added to ALL nginx configs:
- `deployment/helm/charts/onyx/templates/nginx-conf.yaml` (Helm/k8s)
- `deployment/data/nginx/app.conf.template` (docker-compose dev)
- `deployment/data/nginx/app.conf.template.prod` (docker-compose prod)
- `deployment/data/nginx/app.conf.template.no-letsencrypt` (docker-compose no-letsencrypt)
-
-Routes not starting with `/api` are not caught by the existing `^/(api|openapi\.json)` location block and will fall through to `location /`, which proxies to the Next.js web server and returns an HTML 404. The new location block must be placed before the `/api` block. Examples of routes that need this treatment: `/scim`, `/mcp`.
-
 ## Full vs Lite Deployments

 Code changes must consider both regular Onyx deployments and Onyx lite deployments. Lite deployments disable the vector DB, Redis, model servers, and background workers by default, use PostgreSQL-backed cache/auth/file storage, and rely on the API server to handle background work. Do not assume those services are available unless the code path is explicitly limited to full deployments.
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -122,7 +122,7 @@ repos:
    rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1
    hooks:
      - id: golangci-lint
-        language_version: "1.26.1"
+        language_version: "1.26.0"
        entry: bash -c "find . -name go.mod -not -path './.venv/*' -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"

  - repo: https://github.com/astral-sh/ruff-pre-commit
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep
 > [!TIP]
 > Run Onyx with one command (or see deployment section below):
 > ```
-> curl -fsSL https://onyx.app/install_onyx.sh | bash
+> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
 > ```

 ****
--- a/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -474,8 +474,6 @@ def connector_permission_sync_generator_task(
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
-                eager_load_connector=True,
-                eager_load_credential=True,
            )
            if cc_pair is None:
                raise ValueError(
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -8,7 +8,6 @@ from ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map
 from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
-from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.models import HierarchyNode
 from onyx.connectors.slack.connector import get_channels
 from onyx.connectors.slack.connector import make_paginated_slack_api_call
@@ -106,11 +105,9 @@ def _get_slack_document_access(
    slack_connector: SlackConnector,
    channel_permissions: dict[str, ExternalAccess],  # noqa: ARG001
    callback: IndexingHeartbeatInterface | None,
-    indexing_start: SecondsSinceUnixEpoch | None = None,
 ) -> Generator[DocExternalAccess, None, None]:
    slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
-        callback=callback,
-        start=indexing_start,
+        callback=callback
    )

    for doc_metadata_batch in slim_doc_generator:
@@ -183,15 +180,9 @@ def slack_doc_sync(

    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
    slack_connector.set_credentials_provider(provider)
-    indexing_start_ts: SecondsSinceUnixEpoch | None = (
-        cc_pair.connector.indexing_start.timestamp()
-        if cc_pair.connector.indexing_start is not None
-        else None
-    )

    yield from _get_slack_document_access(
-        slack_connector=slack_connector,
+        slack_connector,
        channel_permissions=channel_permissions,
        callback=callback,
-        indexing_start=indexing_start_ts,
    )
--- a/backend/ee/onyx/external_permissions/utils.py
+++ b/backend/ee/onyx/external_permissions/utils.py
@@ -6,7 +6,6 @@ from onyx.access.models import ElementExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.access.models import NodeExternalAccess
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import HierarchyNode
 from onyx.db.models import ConnectorCredentialPair
@@ -41,19 +40,10 @@ def generic_doc_sync(

    logger.info(f"Starting {doc_source} doc sync for CC Pair ID: {cc_pair.id}")

-    indexing_start: SecondsSinceUnixEpoch | None = (
-        cc_pair.connector.indexing_start.timestamp()
-        if cc_pair.connector.indexing_start is not None
-        else None
-    )
-
    newly_fetched_doc_ids: set[str] = set()

    logger.info(f"Fetching all slim documents from {doc_source}")
-    for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(
-        start=indexing_start,
-        callback=callback,
-    ):
+    for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(callback=callback):
        logger.info(f"Got {len(doc_batch)} slim documents from {doc_source}")

        if callback:
--- a/backend/ee/onyx/server/scim/api.py
+++ b/backend/ee/onyx/server/scim/api.py
@@ -11,6 +11,8 @@ require a valid SCIM bearer token.

 from __future__ import annotations

+import hashlib
+import struct
 from uuid import UUID

 from fastapi import APIRouter
@@ -22,6 +24,7 @@ from fastapi import Response
 from fastapi.responses import JSONResponse
 from fastapi_users.password import PasswordHelper
 from sqlalchemy import func
+from sqlalchemy import text
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session

@@ -59,9 +62,25 @@ from onyx.db.models import UserGroup
 from onyx.db.models import UserRole
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
+from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()

+# Group names reserved for system default groups (seeded by migration).
+_RESERVED_GROUP_NAMES = frozenset({"Admin", "Basic"})
+
+# Namespace prefix for the seat-allocation advisory lock. Hashed together
+# with the tenant ID so the lock is scoped per-tenant (unrelated tenants
+# never block each other) and cannot collide with unrelated advisory locks.
+_SEAT_LOCK_NAMESPACE = "onyx_scim_seat_lock"
+
+
+def _seat_lock_id_for_tenant(tenant_id: str) -> int:
+    """Derive a stable 64-bit signed int lock id for this tenant's seat lock."""
+    digest = hashlib.sha256(f"{_SEAT_LOCK_NAMESPACE}:{tenant_id}".encode()).digest()
+    # pg_advisory_xact_lock takes a signed 8-byte int; unpack as such.
+    return struct.unpack("q", digest[:8])[0]
+

 class ScimJSONResponse(JSONResponse):
    """JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1)."""
@@ -200,12 +219,37 @@ def _apply_exclusions(


 def _check_seat_availability(dal: ScimDAL) -> str | None:
-    """Return an error message if seat limit is reached, else None."""
+    """Return an error message if seat limit is reached, else None.
+
+    Acquires a transaction-scoped advisory lock so that concurrent
+    SCIM requests are serialized.  IdPs like Okta send provisioning
+    requests in parallel batches — without serialization the check is
+    vulnerable to a TOCTOU race where N concurrent requests each see
+    "seats available", all insert, and the tenant ends up over its
+    seat limit.
+
+    The lock is held until the caller's next COMMIT or ROLLBACK, which
+    means the seat count cannot change between the check here and the
+    subsequent INSERT/UPDATE.  Each call site in this module follows
+    the pattern: _check_seat_availability → write → dal.commit()
+    (which releases the lock for the next waiting request).
+    """
    check_fn = fetch_ee_implementation_or_noop(
        "onyx.db.license", "check_seat_availability", None
    )
    if check_fn is None:
        return None
+
+    # Transaction-scoped advisory lock — released on dal.commit() / dal.rollback().
+    # The lock id is derived from the tenant so unrelated tenants never block
+    # each other, and from a namespace string so it cannot collide with
+    # unrelated advisory locks elsewhere in the codebase.
+    lock_id = _seat_lock_id_for_tenant(get_current_tenant_id())
+    dal.session.execute(
+        text("SELECT pg_advisory_xact_lock(:lock_id)"),
+        {"lock_id": lock_id},
+    )
+
    result = check_fn(dal.session, seats_needed=1)
    if not result.available:
        return result.error_message or "Seat limit reached"
--- a/backend/onyx/chat/chat_state.py
+++ b/backend/onyx/chat/chat_state.py
@@ -1,8 +1,19 @@
 import threading
+import time
+from collections.abc import Callable
+from collections.abc import Generator
+from queue import Empty

 from onyx.chat.citation_processor import CitationMapping
+from onyx.chat.emitter import Emitter
 from onyx.context.search.models import SearchDoc
+from onyx.server.query_and_chat.placement import Placement
+from onyx.server.query_and_chat.streaming_models import OverallStop
+from onyx.server.query_and_chat.streaming_models import Packet
+from onyx.server.query_and_chat.streaming_models import PacketException
 from onyx.tools.models import ToolCallInfo
+from onyx.utils.threadpool_concurrency import run_in_background
+from onyx.utils.threadpool_concurrency import wait_on_background

 # Type alias for search doc deduplication key
 # Simple key: just document_id (str)
@@ -148,3 +159,114 @@ class ChatStateContainer:
        """Thread-safe getter for emitted citations (returns a copy)."""
        with self._lock:
            return self._emitted_citations.copy()
+
+
+def run_chat_loop_with_state_containers(
+    chat_loop_func: Callable[[Emitter, ChatStateContainer], None],
+    completion_callback: Callable[[ChatStateContainer], None],
+    is_connected: Callable[[], bool],
+    emitter: Emitter,
+    state_container: ChatStateContainer,
+) -> Generator[Packet, None]:
+    """
+    Explicit wrapper function that runs a function in a background thread
+    with event streaming capabilities.
+
+    The wrapped function should accept emitter as first arg and use it to emit
+    Packet objects. This wrapper polls every 300ms to check if stop signal is set.
+
+    Args:
+        func: The function to wrap (should accept emitter and state_container as first and second args)
+        completion_callback: Callback function to call when the function completes
+        emitter: Emitter instance for sending packets
+        state_container: ChatStateContainer instance for accumulating state
+        is_connected: Callable that returns False when stop signal is set
+
+    Usage:
+        packets = run_chat_loop_with_state_containers(
+            my_func,
+            completion_callback=completion_callback,
+            emitter=emitter,
+            state_container=state_container,
+            is_connected=check_func,
+        )
+        for packet in packets:
+            # Process packets
+            pass
+    """
+
+    def run_with_exception_capture() -> None:
+        try:
+            chat_loop_func(emitter, state_container)
+        except Exception as e:
+            # If execution fails, emit an exception packet
+            emitter.emit(
+                Packet(
+                    placement=Placement(turn_index=0),
+                    obj=PacketException(type="error", exception=e),
+                )
+            )
+
+    # Run the function in a background thread
+    thread = run_in_background(run_with_exception_capture)
+
+    pkt: Packet | None = None
+    last_turn_index = 0  # Track the highest turn_index seen for stop packet
+    last_cancel_check = time.monotonic()
+    cancel_check_interval = 0.3  # Check for cancellation every 300ms
+    try:
+        while True:
+            # Poll queue with 300ms timeout for natural stop signal checking
+            # the 300ms timeout is to avoid busy-waiting and to allow the stop signal to be checked regularly
+            try:
+                pkt = emitter.bus.get(timeout=0.3)
+            except Empty:
+                if not is_connected():
+                    # Stop signal detected
+                    yield Packet(
+                        placement=Placement(turn_index=last_turn_index + 1),
+                        obj=OverallStop(type="stop", stop_reason="user_cancelled"),
+                    )
+                    break
+                last_cancel_check = time.monotonic()
+                continue
+
+            if pkt is not None:
+                # Track the highest turn_index for the stop packet
+                if pkt.placement and pkt.placement.turn_index > last_turn_index:
+                    last_turn_index = pkt.placement.turn_index
+
+                if isinstance(pkt.obj, OverallStop):
+                    yield pkt
+                    break
+                elif isinstance(pkt.obj, PacketException):
+                    raise pkt.obj.exception
+                else:
+                    yield pkt
+
+                # Check for cancellation periodically even when packets are flowing
+                # This ensures stop signal is checked during active streaming
+                current_time = time.monotonic()
+                if current_time - last_cancel_check >= cancel_check_interval:
+                    if not is_connected():
+                        # Stop signal detected during streaming
+                        yield Packet(
+                            placement=Placement(turn_index=last_turn_index + 1),
+                            obj=OverallStop(type="stop", stop_reason="user_cancelled"),
+                        )
+                        break
+                    last_cancel_check = current_time
+    finally:
+        # Wait for thread to complete on normal exit to propagate exceptions and ensure cleanup.
+        # Skip waiting if user disconnected to exit quickly.
+        if is_connected():
+            wait_on_background(thread)
+        try:
+            completion_callback(state_container)
+        except Exception as e:
+            emitter.emit(
+                Packet(
+                    placement=Placement(turn_index=last_turn_index + 1),
+                    obj=PacketException(type="error", exception=e),
+                )
+            )
--- a/backend/onyx/chat/emitter.py
+++ b/backend/onyx/chat/emitter.py
@@ -1,40 +1,19 @@
-import threading
 from queue import Queue

-from onyx.server.query_and_chat.placement import Placement
 from onyx.server.query_and_chat.streaming_models import Packet


 class Emitter:
-    """Routes packets from LLM/tool execution to the ``_run_models`` drain loop.
+    """Use this inside tools to emit arbitrary UI progress."""

-    Tags every packet with ``model_index`` and places it on ``merged_queue``
-    as a ``(model_idx, packet)`` tuple for ordered consumption downstream.
-
-    Args:
-        merged_queue: Shared queue owned by ``_run_models``.
-        model_idx: Index embedded in packet placements (``0`` for N=1 runs).
-        drain_done: Optional event set by ``_run_models`` when the drain loop
-            exits early (e.g. HTTP disconnect). When set, ``emit`` returns
-            immediately so worker threads can exit fast.
-    """
-
-    def __init__(
-        self,
-        merged_queue: Queue[tuple[int, Packet | Exception | object]],
-        model_idx: int = 0,
-        drain_done: threading.Event | None = None,
-    ) -> None:
-        self._model_idx = model_idx
-        self._merged_queue = merged_queue
-        self._drain_done = drain_done
+    def __init__(self, bus: Queue):
+        self.bus = bus

    def emit(self, packet: Packet) -> None:
-        if self._drain_done is not None and self._drain_done.is_set():
-            return
-        base = packet.placement or Placement(turn_index=0)
-        tagged = Packet(
-            placement=base.model_copy(update={"model_index": self._model_idx}),
-            obj=packet.obj,
-        )
-        self._merged_queue.put((self._model_idx, tagged))
+        self.bus.put(packet)  # Thread-safe
+
+
+def get_default_emitter() -> Emitter:
+    bus: Queue[Packet] = Queue()
+    emitter = Emitter(bus)
+    return emitter
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -44,31 +44,6 @@ SEND_USER_METADATA_TO_LLM_PROVIDER = (
 # User Facing Features Configs
 #####
 BLURB_SIZE = 128  # Number Encoder Tokens included in the chunk blurb
-
-# Hard ceiling for the admin-configurable file upload size (in MB).
-# Self-hosted customers can raise or lower this via the environment variable.
-_raw_max_upload_size_mb = int(os.environ.get("MAX_ALLOWED_UPLOAD_SIZE_MB", "250"))
-if _raw_max_upload_size_mb < 0:
-    logger.warning(
-        "MAX_ALLOWED_UPLOAD_SIZE_MB=%d is negative; falling back to 250",
-        _raw_max_upload_size_mb,
-    )
-    _raw_max_upload_size_mb = 250
-MAX_ALLOWED_UPLOAD_SIZE_MB = _raw_max_upload_size_mb
-
-# Default fallback for the per-user file upload size limit (in MB) when no
-# admin-configured value exists.  Clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at
-# runtime so this never silently exceeds the hard ceiling.
-_raw_default_upload_size_mb = int(
-    os.environ.get("DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", "100")
-)
-if _raw_default_upload_size_mb < 0:
-    logger.warning(
-        "DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=%d is negative; falling back to 100",
-        _raw_default_upload_size_mb,
-    )
-    _raw_default_upload_size_mb = 100
-DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB = _raw_default_upload_size_mb
 GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
    os.environ.get("GENERATIVE_MODEL_ACCESS_CHECK_FREQ") or 86400
 )  # 1 day
@@ -86,6 +61,17 @@ CACHE_BACKEND = CacheBackendType(
    os.environ.get("CACHE_BACKEND", CacheBackendType.REDIS)
 )

+# Maximum token count for a single uploaded file. Files exceeding this are rejected.
+# Defaults to 100k tokens (or 10M when vector DB is disabled).
+_DEFAULT_FILE_TOKEN_LIMIT = 10_000_000 if DISABLE_VECTOR_DB else 100_000
+FILE_TOKEN_COUNT_THRESHOLD = int(
+    os.environ.get("FILE_TOKEN_COUNT_THRESHOLD", str(_DEFAULT_FILE_TOKEN_LIMIT))
+)
+
+# Maximum upload size for a single user file (chat/projects) in MB.
+USER_FILE_MAX_UPLOAD_SIZE_MB = int(os.environ.get("USER_FILE_MAX_UPLOAD_SIZE_MB") or 50)
+USER_FILE_MAX_UPLOAD_SIZE_BYTES = USER_FILE_MAX_UPLOAD_SIZE_MB * 1024 * 1024
+
 # If set to true, will show extra/uncommon connectors in the "Other" category
 SHOW_EXTRA_CONNECTORS = os.environ.get("SHOW_EXTRA_CONNECTORS", "").lower() == "true"

@@ -805,10 +791,6 @@ MINI_CHUNK_SIZE = 150
 # This is the number of regular chunks per large chunk
 LARGE_CHUNK_RATIO = 4

-# The maximum number of chunks that can be held for 1 document processing batch
-# The purpose of this is to set an upper bound on memory usage
-MAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get("MAX_CHUNKS_PER_DOC_BATCH") or 1000)
-
 # Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out
 # We don't want the metadata to overwhelm the actual contents of the chunk
 SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true"
@@ -834,6 +816,29 @@ MAX_FILE_SIZE_BYTES = int(
    os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
 )  # 2GB in bytes

+# Maximum embedded images allowed in a single file. PDFs (and other formats)
+# with thousands of embedded images can OOM the user-file-processing worker
+# because every image is decoded with PIL and then sent to the vision LLM.
+# Enforced both at upload time (rejects the file) and during extraction
+# (defense-in-depth: caps the number of images materialized).
+#
+# Clamped to >= 0; a negative env value would turn upload validation into
+# always-fail and extraction into always-stop, which is never desired. 0
+# disables image extraction entirely, which is a valid (if aggressive) setting.
+MAX_EMBEDDED_IMAGES_PER_FILE = max(
+    0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
+)
+
+# Maximum embedded images allowed across all files in a single upload batch.
+# Protects against the scenario where a user uploads many files that each
+# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
+# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
+# worker under concurrency or run up surprise latency/cost. Also clamped
+# to >= 0.
+MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
+    0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
+)
+
 # Use document summary for contextual rag
 USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
 # Use chunk summary for contextual rag
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -12,6 +12,11 @@ SLACK_USER_TOKEN_PREFIX = "xoxp-"
 SLACK_BOT_TOKEN_PREFIX = "xoxb-"
 ONYX_EMAILABLE_LOGO_MAX_DIM = 512

+# The mask_string() function in encryption.py uses "•" (U+2022 BULLET) to mask secrets.
+MASK_CREDENTIAL_CHAR = "\u2022"
+# Pattern produced by mask_string for strings >= 14 chars: "abcd...wxyz" (exactly 11 chars)
+MASK_CREDENTIAL_LONG_RE = re.compile(r"^.{4}\.{3}.{4}$")
+
 SOURCE_TYPE = "source_type"
 # stored in the `metadata` of a chunk. Used to signify that this chunk should
 # not be used for QA. For example, Google Drive file types which can't be parsed
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -890,8 +890,8 @@ class ConfluenceConnector(

    def _retrieve_all_slim_docs(
        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
+        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
+        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,
        include_permissions: bool = True,
    ) -> GenerateSlimDocumentOutput:
@@ -915,8 +915,8 @@ class ConfluenceConnector(
                self.confluence_client, doc_id, restrictions, ancestors
            ) or space_level_access_info.get(page_space_key)

-        # Query pages (with optional time filtering for indexing_start)
-        page_query = self._construct_page_cql_query(start, end)
+        # Query pages
+        page_query = self.base_cql_page_query + self.cql_label_filter
        for page in self.confluence_client.cql_paginate_all_expansions(
            cql=page_query,
            expand=restrictions_expand,
@@ -950,9 +950,7 @@ class ConfluenceConnector(

            # Query attachments for each page
            page_hierarchy_node_yielded = False
-            attachment_query = self._construct_attachment_query(
-                _get_page_id(page), start, end
-            )
+            attachment_query = self._construct_attachment_query(_get_page_id(page))
            for attachment in self.confluence_client.cql_paginate_all_expansions(
                cql=attachment_query,
                expand=restrictions_expand,
--- a/backend/onyx/connectors/sharepoint/connector.py
+++ b/backend/onyx/connectors/sharepoint/connector.py
@@ -1765,11 +1765,7 @@ class SharepointConnector(
        checkpoint.current_drive_delta_next_link = None
        checkpoint.seen_document_ids.clear()

-    def _fetch_slim_documents_from_sharepoint(
-        self,
-        start: datetime | None = None,
-        end: datetime | None = None,
-    ) -> GenerateSlimDocumentOutput:
+    def _fetch_slim_documents_from_sharepoint(self) -> GenerateSlimDocumentOutput:
        site_descriptors = self._filter_excluded_sites(
            self.site_descriptors or self.fetch_sites()
        )
@@ -1790,9 +1786,7 @@ class SharepointConnector(
            # Process site documents if flag is True
            if self.include_site_documents:
                for driveitem, drive_name, drive_web_url in self._fetch_driveitems(
-                    site_descriptor=site_descriptor,
-                    start=start,
-                    end=end,
+                    site_descriptor=site_descriptor
                ):
                    if self._is_driveitem_excluded(driveitem):
                        logger.debug(f"Excluding by path denylist: {driveitem.web_url}")
@@ -1847,9 +1841,7 @@ class SharepointConnector(

            # Process site pages if flag is True
            if self.include_site_pages:
-                site_pages = self._fetch_site_pages(
-                    site_descriptor, start=start, end=end
-                )
+                site_pages = self._fetch_site_pages(site_descriptor)
                for site_page in site_pages:
                    logger.debug(
                        f"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}"
@@ -2573,22 +2565,12 @@ class SharepointConnector(

    def retrieve_all_slim_docs_perm_sync(
        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
+        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
+        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,  # noqa: ARG002
    ) -> GenerateSlimDocumentOutput:
-        start_dt = (
-            datetime.fromtimestamp(start, tz=timezone.utc)
-            if start is not None
-            else None
-        )
-        end_dt = (
-            datetime.fromtimestamp(end, tz=timezone.utc) if end is not None else None
-        )
-        yield from self._fetch_slim_documents_from_sharepoint(
-            start=start_dt,
-            end=end_dt,
-        )
+
+        yield from self._fetch_slim_documents_from_sharepoint()


 if __name__ == "__main__":
--- a/backend/onyx/connectors/slack/connector.py
+++ b/backend/onyx/connectors/slack/connector.py
@@ -516,8 +516,6 @@ def _get_all_doc_ids(
    ] = default_msg_filter,
    callback: IndexingHeartbeatInterface | None = None,
    workspace_url: str | None = None,
-    start: SecondsSinceUnixEpoch | None = None,
-    end: SecondsSinceUnixEpoch | None = None,
 ) -> GenerateSlimDocumentOutput:
    """
    Get all document ids in the workspace, channel by channel
@@ -548,8 +546,6 @@ def _get_all_doc_ids(
            client=client,
            channel=channel,
            callback=callback,
-            oldest=str(start) if start else None,  # 0.0 -> None intentionally
-            latest=str(end) if end is not None else None,
        )

        for message_batch in channel_message_batches:
@@ -851,8 +847,8 @@ class SlackConnector(

    def retrieve_all_slim_docs_perm_sync(
        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
+        start: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
+        end: SecondsSinceUnixEpoch | None = None,  # noqa: ARG002
        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        if self.client is None:
@@ -865,8 +861,6 @@ class SlackConnector(
            msg_filter_func=self.msg_filter_func,
            callback=callback,
            workspace_url=self._workspace_url,
-            start=start,
-            end=end,
        )

    def _load_from_checkpoint(
--- a/backend/onyx/context/search/federated/models.py
+++ b/backend/onyx/context/search/federated/models.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from datetime import datetime
 from typing import TypedDict

@@ -6,6 +7,14 @@ from pydantic import BaseModel
 from onyx.onyxbot.slack.models import ChannelType


+@dataclass(frozen=True)
+class DirectThreadFetch:
+    """Request to fetch a Slack thread directly by channel and timestamp."""
+
+    channel_id: str
+    thread_ts: str
+
+
 class ChannelMetadata(TypedDict):
    """Type definition for cached channel metadata."""

--- a/backend/onyx/context/search/federated/slack_search.py
+++ b/backend/onyx/context/search/federated/slack_search.py
@@ -19,6 +19,7 @@ from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.connectors.models import IndexingDocument
 from onyx.connectors.models import TextSection
 from onyx.context.search.federated.models import ChannelMetadata
+from onyx.context.search.federated.models import DirectThreadFetch
 from onyx.context.search.federated.models import SlackMessage
 from onyx.context.search.federated.slack_search_utils import ALL_CHANNEL_TYPES
 from onyx.context.search.federated.slack_search_utils import build_channel_query_filter
@@ -49,7 +50,6 @@ from onyx.server.federated.models import FederatedConnectorDetail
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 from onyx.utils.timing import log_function_time
-from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE

 logger = setup_logger()

@@ -58,7 +58,6 @@ HIGHLIGHT_END_CHAR = "\ue001"

 CHANNEL_METADATA_CACHE_TTL = 60 * 60 * 24  # 24 hours
 USER_PROFILE_CACHE_TTL = 60 * 60 * 24  # 24 hours
-SLACK_THREAD_CONTEXT_WINDOW = 3  # Number of messages before matched message to include
 CHANNEL_METADATA_MAX_RETRIES = 3  # Maximum retry attempts for channel metadata fetching
 CHANNEL_METADATA_RETRY_DELAY = 1  # Initial retry delay in seconds (exponential backoff)

@@ -421,6 +420,94 @@ class SlackQueryResult(BaseModel):
    filtered_channels: list[str]  # Channels filtered out during this query


+def _fetch_thread_from_url(
+    thread_fetch: DirectThreadFetch,
+    access_token: str,
+    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
+) -> SlackQueryResult:
+    """Fetch a thread directly from a Slack URL via conversations.replies."""
+    channel_id = thread_fetch.channel_id
+    thread_ts = thread_fetch.thread_ts
+
+    slack_client = WebClient(token=access_token)
+    try:
+        response = slack_client.conversations_replies(
+            channel=channel_id,
+            ts=thread_ts,
+        )
+        response.validate()
+        messages: list[dict[str, Any]] = response.get("messages", [])
+    except SlackApiError as e:
+        logger.warning(
+            f"Failed to fetch thread from URL (channel={channel_id}, ts={thread_ts}): {e}"
+        )
+        return SlackQueryResult(messages=[], filtered_channels=[])
+
+    if not messages:
+        logger.warning(
+            f"No messages found for URL override (channel={channel_id}, ts={thread_ts})"
+        )
+        return SlackQueryResult(messages=[], filtered_channels=[])
+
+    # Build thread text from all messages
+    thread_text = _build_thread_text(messages, access_token, None, slack_client)
+
+    # Get channel name from metadata cache or API
+    channel_name = "unknown"
+    if channel_metadata_dict and channel_id in channel_metadata_dict:
+        channel_name = channel_metadata_dict[channel_id].get("name", "unknown")
+    else:
+        try:
+            ch_response = slack_client.conversations_info(channel=channel_id)
+            ch_response.validate()
+            channel_info: dict[str, Any] = ch_response.get("channel", {})
+            channel_name = channel_info.get("name", "unknown")
+        except SlackApiError:
+            pass
+
+    # Build the SlackMessage
+    parent_msg = messages[0]
+    message_ts = parent_msg.get("ts", thread_ts)
+    username = parent_msg.get("user", "unknown_user")
+    parent_text = parent_msg.get("text", "")
+    snippet = (
+        parent_text[:50].rstrip() + "..." if len(parent_text) > 50 else parent_text
+    ).replace("\n", " ")
+
+    doc_time = datetime.fromtimestamp(float(message_ts))
+    decay_factor = DOC_TIME_DECAY
+    doc_age_years = (datetime.now() - doc_time).total_seconds() / (365 * 24 * 60 * 60)
+    recency_bias = max(1 / (1 + decay_factor * doc_age_years), 0.75)
+
+    permalink = (
+        f"https://slack.com/archives/{channel_id}/p{message_ts.replace('.', '')}"
+    )
+
+    slack_message = SlackMessage(
+        document_id=f"{channel_id}_{message_ts}",
+        channel_id=channel_id,
+        message_id=message_ts,
+        thread_id=None,  # Prevent double-enrichment in thread context fetch
+        link=permalink,
+        metadata={
+            "channel": channel_name,
+            "time": doc_time.isoformat(),
+        },
+        timestamp=doc_time,
+        recency_bias=recency_bias,
+        semantic_identifier=f"{username} in #{channel_name}: {snippet}",
+        text=thread_text,
+        highlighted_texts=set(),
+        slack_score=100000.0,  # High priority — user explicitly asked for this thread
+    )
+
+    logger.info(
+        f"URL override: fetched thread from channel={channel_id}, ts={thread_ts}, {len(messages)} messages"
+    )
+
+    return SlackQueryResult(messages=[slack_message], filtered_channels=[])
+
+
 def query_slack(
    query_string: str,
    access_token: str,
@@ -432,7 +519,6 @@ def query_slack(
    available_channels: list[str] | None = None,
    channel_metadata_dict: dict[str, ChannelMetadata] | None = None,
 ) -> SlackQueryResult:
-
    # Check if query has channel override (user specified channels in query)
    has_channel_override = query_string.startswith("__CHANNEL_OVERRIDE__")

@@ -662,7 +748,6 @@ def _fetch_thread_context(
    """
    channel_id = message.channel_id
    thread_id = message.thread_id
-    message_id = message.message_id

    # If not a thread, return original text as success
    if thread_id is None:
@@ -695,62 +780,37 @@ def _fetch_thread_context(
    if len(messages) <= 1:
        return ThreadContextResult.success(message.text)

-    # Build thread text from thread starter + context window around matched message
-    thread_text = _build_thread_text(
-        messages, message_id, thread_id, access_token, team_id, slack_client
-    )
+    # Build thread text from thread starter + all replies
+    thread_text = _build_thread_text(messages, access_token, team_id, slack_client)
    return ThreadContextResult.success(thread_text)


 def _build_thread_text(
    messages: list[dict[str, Any]],
-    message_id: str,
-    thread_id: str,
    access_token: str,
    team_id: str | None,
    slack_client: WebClient,
 ) -> str:
-    """Build the thread text from messages."""
+    """Build thread text including all replies.
+
+    Includes the thread parent message followed by all replies in order.
+    """
    msg_text = messages[0].get("text", "")
    msg_sender = messages[0].get("user", "")
    thread_text = f"<@{msg_sender}>: {msg_text}"

+    # All messages after index 0 are replies
+    replies = messages[1:]
+    if not replies:
+        return thread_text
+
+    logger.debug(f"Thread {messages[0].get('ts')}: {len(replies)} replies included")
    thread_text += "\n\nReplies:"
-    if thread_id == message_id:
-        message_id_idx = 0
-    else:
-        message_id_idx = next(
-            (i for i, msg in enumerate(messages) if msg.get("ts") == message_id), 0
-        )
-        if not message_id_idx:
-            return thread_text

-        start_idx = max(1, message_id_idx - SLACK_THREAD_CONTEXT_WINDOW)
-
-        if start_idx > 1:
-            thread_text += "\n..."
-
-        for i in range(start_idx, message_id_idx):
-            msg_text = messages[i].get("text", "")
-            msg_sender = messages[i].get("user", "")
-            thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
-
-        msg_text = messages[message_id_idx].get("text", "")
-        msg_sender = messages[message_id_idx].get("user", "")
-        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"
-
-    # Add following replies
-    len_replies = 0
-    for msg in messages[message_id_idx + 1 :]:
+    for msg in replies:
        msg_text = msg.get("text", "")
        msg_sender = msg.get("user", "")
-        reply = f"\n\n<@{msg_sender}>: {msg_text}"
-        thread_text += reply
-
-        len_replies += len(reply)
-        if len_replies >= DOC_EMBEDDING_CONTEXT_SIZE * 4:
-            thread_text += "\n..."
-            break
+        thread_text += f"\n\n<@{msg_sender}>: {msg_text}"

    # Replace user IDs with names using cached lookups
    userids: set[str] = set(re.findall(r"<@([A-Z0-9]+)>", thread_text))
@@ -976,7 +1036,16 @@ def slack_retrieval(

    # Query slack with entity filtering
    llm = get_default_llm()
-    query_strings = build_slack_queries(query, llm, entities, available_channels)
+    query_items = build_slack_queries(query, llm, entities, available_channels)
+
+    # Partition into direct thread fetches and search query strings
+    direct_fetches: list[DirectThreadFetch] = []
+    query_strings: list[str] = []
+    for item in query_items:
+        if isinstance(item, DirectThreadFetch):
+            direct_fetches.append(item)
+        else:
+            query_strings.append(item)

    # Determine filtering based on entities OR context (bot)
    include_dm = False
@@ -993,8 +1062,16 @@ def slack_retrieval(
                f"Private channel context: will only allow messages from {allowed_private_channel} + public channels"
            )

-    # Build search tasks
-    search_tasks = [
+    # Build search tasks — direct thread fetches + keyword searches
+    search_tasks: list[tuple] = [
+        (
+            _fetch_thread_from_url,
+            (fetch, access_token, channel_metadata_dict),
+        )
+        for fetch in direct_fetches
+    ]
+
+    search_tasks.extend(
        (
            query_slack,
            (
@@ -1010,7 +1087,7 @@ def slack_retrieval(
            ),
        )
        for query_string in query_strings
-    ]
+    )

    # If include_dm is True AND we're not already searching all channels,
    # add additional searches without channel filters.
--- a/backend/onyx/context/search/federated/slack_search_utils.py
+++ b/backend/onyx/context/search/federated/slack_search_utils.py
@@ -10,6 +10,7 @@ from pydantic import ValidationError

 from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
 from onyx.context.search.federated.models import ChannelMetadata
+from onyx.context.search.federated.models import DirectThreadFetch
 from onyx.context.search.models import ChunkIndexRequest
 from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.llm.interfaces import LLM
@@ -638,12 +639,38 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
        return [query_text]


+SLACK_URL_PATTERN = re.compile(
+    r"https?://[a-z0-9-]+\.slack\.com/archives/([A-Z0-9]+)/p(\d{16})"
+)
+
+
+def extract_slack_message_urls(
+    query_text: str,
+) -> list[tuple[str, str]]:
+    """Extract Slack message URLs from query text.
+
+    Parses URLs like:
+      https://onyx-company.slack.com/archives/C097NBWMY8Y/p1775491616524769
+
+    Returns list of (channel_id, thread_ts) tuples.
+    The 16-digit timestamp is converted to Slack ts format (with dot).
+    """
+    results = []
+    for match in SLACK_URL_PATTERN.finditer(query_text):
+        channel_id = match.group(1)
+        raw_ts = match.group(2)
+        # Convert p1775491616524769 -> 1775491616.524769
+        thread_ts = f"{raw_ts[:10]}.{raw_ts[10:]}"
+        results.append((channel_id, thread_ts))
+    return results
+
+
 def build_slack_queries(
    query: ChunkIndexRequest,
    llm: LLM,
    entities: dict[str, Any] | None = None,
    available_channels: list[str] | None = None,
-) -> list[str]:
+) -> list[str | DirectThreadFetch]:
    """Build Slack query strings with date filtering and query expansion."""
    default_search_days = 30
    if entities:
@@ -668,6 +695,15 @@ def build_slack_queries(
            cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
            time_filter = f" after:{cutoff_date.strftime('%Y-%m-%d')}"

+    # Check for Slack message URLs — if found, add direct fetch requests
+    url_fetches: list[DirectThreadFetch] = []
+    slack_urls = extract_slack_message_urls(query.query)
+    for channel_id, thread_ts in slack_urls:
+        url_fetches.append(
+            DirectThreadFetch(channel_id=channel_id, thread_ts=thread_ts)
+        )
+        logger.info(f"Detected Slack URL: channel={channel_id}, ts={thread_ts}")
+
    # ALWAYS extract channel references from the query (not just for recency queries)
    channel_references = extract_channel_references_from_query(query.query)

@@ -684,7 +720,9 @@ def build_slack_queries(

            # If valid channels detected, use ONLY those channels with NO keywords
            # Return query with ONLY time filter + channel filter (no keywords)
-            return [build_channel_override_query(channel_references, time_filter)]
+            return url_fetches + [
+                build_channel_override_query(channel_references, time_filter)
+            ]
        except ValueError as e:
            # If validation fails, log the error and continue with normal flow
            logger.warning(f"Channel reference validation failed: {e}")
@@ -702,7 +740,8 @@ def build_slack_queries(
        rephrased_queries = expand_query_with_llm(query.query, llm)

    # Build final query strings with time filters
-    return [
+    search_queries = [
        rephrased_query.strip() + time_filter
        for rephrased_query in rephrased_queries[:MAX_SLACK_QUERY_EXPANSIONS]
    ]
+    return url_fetches + search_queries
--- a/backend/onyx/db/api_key.py
+++ b/backend/onyx/db/api_key.py
@@ -4,7 +4,6 @@ from fastapi_users.password import PasswordHelper
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload
-from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

 from onyx.auth.api_key import ApiKeyDescriptor
@@ -55,7 +54,6 @@ async def fetch_user_for_api_key(
        select(User)
        .join(ApiKey, ApiKey.user_id == User.id)
        .where(ApiKey.hashed_api_key == hashed_api_key)
-        .options(selectinload(User.memories))
    )


--- a/backend/onyx/db/auth.py
+++ b/backend/onyx/db/auth.py
@@ -13,7 +13,6 @@ from sqlalchemy import func
 from sqlalchemy import Select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
-from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

 from onyx.auth.schemas import UserRole
@@ -98,11 +97,6 @@ async def get_user_count(only_admin_users: bool = False) -> int:

 # Need to override this because FastAPI Users doesn't give flexibility for backend field creation logic in OAuth flow
 class SQLAlchemyUserAdminDB(SQLAlchemyUserDatabase[UP, ID]):
-    async def _get_user(self, statement: Select) -> UP | None:
-        statement = statement.options(selectinload(User.memories))
-        results = await self.session.execute(statement)
-        return results.unique().scalar_one_or_none()
-
    async def create(
        self,
        create_dict: Dict[str, Any],
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -8,7 +8,6 @@ from uuid import UUID
 from fastapi import HTTPException
 from sqlalchemy import delete
 from sqlalchemy import desc
-from sqlalchemy import exists
 from sqlalchemy import func
 from sqlalchemy import nullsfirst
 from sqlalchemy import or_
@@ -132,32 +131,47 @@ def get_chat_sessions_by_user(
    if before is not None:
        stmt = stmt.where(ChatSession.time_updated < before)

-    if limit:
-        stmt = stmt.limit(limit)
-
    if project_id is not None:
        stmt = stmt.where(ChatSession.project_id == project_id)
    elif only_non_project_chats:
        stmt = stmt.where(ChatSession.project_id.is_(None))

-    if not include_failed_chats:
-        non_system_message_exists_subq = (
-            exists()
-            .where(ChatMessage.chat_session_id == ChatSession.id)
-            .where(ChatMessage.message_type != MessageType.SYSTEM)
-            .correlate(ChatSession)
-        )
-
-        # Leeway for newly created chats that don't have messages yet
-        time = datetime.now(timezone.utc) - timedelta(minutes=5)
-        recently_created = ChatSession.time_created >= time
-
-        stmt = stmt.where(or_(non_system_message_exists_subq, recently_created))
+    # When filtering out failed chats, we apply the limit in Python after
+    # filtering rather than in SQL, since the post-filter may remove rows.
+    if limit and include_failed_chats:
+        stmt = stmt.limit(limit)

    result = db_session.execute(stmt)
-    chat_sessions = result.scalars().all()
+    chat_sessions = list(result.scalars().all())

-    return list(chat_sessions)
+    if not include_failed_chats and chat_sessions:
+        # Filter out "failed" sessions (those with only SYSTEM messages)
+        # using a separate efficient query instead of a correlated EXISTS
+        # subquery, which causes full sequential scans of chat_message.
+        leeway = datetime.now(timezone.utc) - timedelta(minutes=5)
+        session_ids = [cs.id for cs in chat_sessions if cs.time_created < leeway]
+
+        if session_ids:
+            valid_session_ids_stmt = (
+                select(ChatMessage.chat_session_id)
+                .where(ChatMessage.chat_session_id.in_(session_ids))
+                .where(ChatMessage.message_type != MessageType.SYSTEM)
+                .distinct()
+            )
+            valid_session_ids = set(
+                db_session.execute(valid_session_ids_stmt).scalars().all()
+            )
+
+            chat_sessions = [
+                cs
+                for cs in chat_sessions
+                if cs.time_created >= leeway or cs.id in valid_session_ids
+            ]
+
+        if limit:
+            chat_sessions = chat_sessions[:limit]
+
+    return chat_sessions


 def delete_orphaned_search_docs(db_session: Session) -> None:
@@ -617,92 +631,6 @@ def reserve_message_id(
    return empty_message


-def reserve_multi_model_message_ids(
-    db_session: Session,
-    chat_session_id: UUID,
-    parent_message_id: int,
-    model_display_names: list[str],
-) -> list[ChatMessage]:
-    """Reserve N assistant message placeholders for multi-model parallel streaming.
-
-    All messages share the same parent (the user message). The parent's
-    latest_child_message_id points to the LAST reserved message so that the
-    default history-chain walker picks it up.
-    """
-    reserved: list[ChatMessage] = []
-    for display_name in model_display_names:
-        msg = ChatMessage(
-            chat_session_id=chat_session_id,
-            parent_message_id=parent_message_id,
-            latest_child_message_id=None,
-            message="Response was terminated prior to completion, try regenerating.",
-            token_count=15,  # placeholder; updated on completion by llm_loop_completion_handle
-            message_type=MessageType.ASSISTANT,
-            model_display_name=display_name,
-        )
-        db_session.add(msg)
-        reserved.append(msg)
-
-    # Flush to assign IDs without committing yet
-    db_session.flush()
-
-    # Point parent's latest_child to the last reserved message
-    parent = (
-        db_session.query(ChatMessage)
-        .filter(ChatMessage.id == parent_message_id)
-        .first()
-    )
-    if parent:
-        parent.latest_child_message_id = reserved[-1].id
-
-    db_session.commit()
-    return reserved
-
-
-def set_preferred_response(
-    db_session: Session,
-    user_message_id: int,
-    preferred_assistant_message_id: int,
-) -> None:
-    """Mark one assistant response as the user's preferred choice in a multi-model turn.
-
-    Also advances ``latest_child_message_id`` so the preferred response becomes
-    the active branch for any subsequent messages in the conversation.
-
-    Args:
-        db_session: Active database session.
-        user_message_id: Primary key of the ``USER``-type ``ChatMessage`` whose
-            preferred response is being set.
-        preferred_assistant_message_id: Primary key of the ``ASSISTANT``-type
-            ``ChatMessage`` to prefer. Must be a direct child of ``user_message_id``.
-
-    Raises:
-        ValueError: If either message is not found, if ``user_message_id`` does not
-            refer to a USER message, or if the assistant message is not a direct child
-            of the user message.
-    """
-    user_msg = db_session.get(ChatMessage, user_message_id)
-    if user_msg is None:
-        raise ValueError(f"User message {user_message_id} not found")
-    if user_msg.message_type != MessageType.USER:
-        raise ValueError(f"Message {user_message_id} is not a user message")
-
-    assistant_msg = db_session.get(ChatMessage, preferred_assistant_message_id)
-    if assistant_msg is None:
-        raise ValueError(
-            f"Assistant message {preferred_assistant_message_id} not found"
-        )
-    if assistant_msg.parent_message_id != user_message_id:
-        raise ValueError(
-            f"Assistant message {preferred_assistant_message_id} is not a child "
-            f"of user message {user_message_id}"
-        )
-
-    user_msg.preferred_response_id = preferred_assistant_message_id
-    user_msg.latest_child_message_id = preferred_assistant_message_id
-    db_session.commit()
-
-
 def create_new_chat_message(
    chat_session_id: UUID,
    parent_message: ChatMessage,
@@ -925,8 +853,6 @@ def translate_db_message_to_chat_message_detail(
        error=chat_message.error,
        current_feedback=current_feedback,
        processing_duration_seconds=chat_message.processing_duration_seconds,
-        preferred_response_id=chat_message.preferred_response_id,
-        model_display_name=chat_message.model_display_name,
    )

    return chat_msg_detail
--- a/backend/onyx/db/federated.py
+++ b/backend/onyx/db/federated.py
@@ -8,6 +8,8 @@ from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

 from onyx.configs.constants import FederatedConnectorSource
+from onyx.configs.constants import MASK_CREDENTIAL_CHAR
+from onyx.configs.constants import MASK_CREDENTIAL_LONG_RE
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.models import DocumentSet
 from onyx.db.models import FederatedConnector
@@ -45,6 +47,23 @@ def fetch_all_federated_connectors_parallel() -> list[FederatedConnector]:
        return fetch_all_federated_connectors(db_session)


+def _reject_masked_credentials(credentials: dict[str, Any]) -> None:
+    """Raise if any credential string value contains mask placeholder characters.
+
+    mask_string() has two output formats:
+    - Short strings (< 14 chars): "••••••••••••" (U+2022 BULLET)
+    - Long strings (>= 14 chars): "abcd...wxyz" (first4 + "..." + last4)
+    Both must be rejected.
+    """
+    for key, val in credentials.items():
+        if isinstance(val, str) and (
+            MASK_CREDENTIAL_CHAR in val or MASK_CREDENTIAL_LONG_RE.match(val)
+        ):
+            raise ValueError(
+                f"Credential field '{key}' contains masked placeholder characters. Please provide the actual credential value."
+            )
+
+
 def validate_federated_connector_credentials(
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
@@ -66,6 +85,8 @@ def create_federated_connector(
    config: dict[str, Any] | None = None,
 ) -> FederatedConnector:
    """Create a new federated connector with credential and config validation."""
+    _reject_masked_credentials(credentials)
+
    # Validate credentials before creating
    if not validate_federated_connector_credentials(source, credentials):
        raise ValueError(
@@ -277,6 +298,8 @@ def update_federated_connector(
    )

    if credentials is not None:
+        _reject_masked_credentials(credentials)
+
        # Validate credentials before updating
        if not validate_federated_connector_credentials(
            federated_connector.source, credentials
--- a/backend/onyx/db/pat.py
+++ b/backend/onyx/db/pat.py
@@ -8,7 +8,6 @@ from uuid import UUID
 from sqlalchemy import select
 from sqlalchemy import update
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

 from onyx.auth.pat import build_displayable_pat
@@ -47,7 +46,6 @@ async def fetch_user_for_pat(
            (PersonalAccessToken.expires_at.is_(None))
            | (PersonalAccessToken.expires_at > now)
        )
-        .options(selectinload(User.memories))
    )
    if not user:
        return None
--- a/backend/onyx/db/user_preferences.py
+++ b/backend/onyx/db/user_preferences.py
@@ -229,7 +229,9 @@ def get_memories_for_user(
    user_id: UUID,
    db_session: Session,
 ) -> Sequence[Memory]:
-    return db_session.scalars(select(Memory).where(Memory.user_id == user_id)).all()
+    return db_session.scalars(
+        select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.desc())
+    ).all()


 def update_user_pinned_assistants(
--- a/backend/onyx/document_index/disabled.py
+++ b/backend/onyx/document_index/disabled.py
@@ -5,7 +5,6 @@ accidentally reaches the vector DB layer will fail loudly instead of timing
 out against a nonexistent Vespa/OpenSearch instance.
 """

-from collections.abc import Iterable
 from typing import Any

 from onyx.context.search.models import IndexFilters
@@ -67,7 +66,7 @@ class DisabledDocumentIndex(DocumentIndex):
    # ------------------------------------------------------------------
    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],  # noqa: ARG002
+        chunks: list[DocMetadataAwareIndexChunk],  # noqa: ARG002
        index_batch_params: IndexBatchParams,  # noqa: ARG002
    ) -> set[DocumentInsertionRecord]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -1,5 +1,4 @@
 import abc
-from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Any
@@ -207,7 +206,7 @@ class Indexable(abc.ABC):
    @abc.abstractmethod
    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[DocumentInsertionRecord]:
        """
@@ -227,8 +226,8 @@ class Indexable(abc.ABC):
        it is done automatically outside of this code.

        Parameters:
-        - chunks: Document chunks with all of the information needed for
-                indexing to the document index.
+        - chunks: Document chunks with all of the information needed for indexing to the document
+                index.
        - tenant_id: The tenant id of the user whose chunks are being indexed
        - large_chunks_enabled: Whether large chunks are enabled

--- a/backend/onyx/document_index/interfaces_new.py
+++ b/backend/onyx/document_index/interfaces_new.py
@@ -1,5 +1,4 @@
 import abc
-from collections.abc import Iterable
 from typing import Self

 from pydantic import BaseModel
@@ -210,10 +209,10 @@ class Indexable(abc.ABC):
    @abc.abstractmethod
    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
-        """Indexes an iterable of document chunks into the document index.
+        """Indexes a list of document chunks into the document index.

        This is often a batch operation including chunks from multiple
        documents.
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -1,12 +1,11 @@
 import json
-from collections.abc import Iterable
+from collections import defaultdict
 from typing import Any

 import httpx
 from opensearchpy import NotFoundError

 from onyx.access.models import DocumentAccess
-from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -352,7 +351,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
@@ -648,10 +647,10 @@ class OpenSearchDocumentIndex(DocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
-        indexing_metadata: IndexingMetadata,
+        chunks: list[DocMetadataAwareIndexChunk],
+        indexing_metadata: IndexingMetadata,  # noqa: ARG002
    ) -> list[DocumentInsertionRecord]:
-        """Indexes an iterable of document chunks into the document index.
+        """Indexes a list of document chunks into the document index.

        Groups chunks by document ID and for each document, deletes existing
        chunks and indexes the new chunks in bulk.
@@ -674,34 +673,29 @@ class OpenSearchDocumentIndex(DocumentIndex):
                document is newly indexed or had already existed and was just
                updated.
        """
-        total_chunks = sum(
-            cc.new_chunk_cnt
-            for cc in indexing_metadata.doc_id_to_chunk_cnt_diff.values()
+        # Group chunks by document ID.
+        doc_id_to_chunks: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(
+            list
        )
+        for chunk in chunks:
+            doc_id_to_chunks[chunk.source_document.id].append(chunk)
        logger.debug(
-            f"[OpenSearchDocumentIndex] Indexing {total_chunks} chunks from {len(indexing_metadata.doc_id_to_chunk_cnt_diff)} "
+            f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks from {len(doc_id_to_chunks)} "
            f"documents for index {self._index_name}."
        )

        document_indexing_results: list[DocumentInsertionRecord] = []
-        deleted_doc_ids: set[str] = set()
-        # Buffer chunks per document as they arrive from the iterable.
-        # When the document ID changes flush the buffered chunks.
-        current_doc_id: str | None = None
-        current_chunks: list[DocMetadataAwareIndexChunk] = []
-
-        def _flush_chunks(doc_chunks: list[DocMetadataAwareIndexChunk]) -> None:
-            assert len(doc_chunks) > 0, "doc_chunks is empty"
-
+        # Try to index per-document.
+        for _, chunks in doc_id_to_chunks.items():
            # Create a batch of OpenSearch-formatted chunks for bulk insertion.
-            # Since we are doing this in batches, an error occurring midway
-            # can result in a state where chunks are deleted and not all the
-            # new chunks have been indexed.
+            # Do this before deleting existing chunks to reduce the amount of
+            # time the document index has no content for a given document, and
+            # to reduce the chance of entering a state where we delete chunks,
+            # then some error happens, and never successfully index new chunks.
            chunk_batch: list[DocumentChunk] = [
-                _convert_onyx_chunk_to_opensearch_document(chunk)
-                for chunk in doc_chunks
+                _convert_onyx_chunk_to_opensearch_document(chunk) for chunk in chunks
            ]
-            onyx_document: Document = doc_chunks[0].source_document
+            onyx_document: Document = chunks[0].source_document
            # First delete the doc's chunks from the index. This is so that
            # there are no dangling chunks in the index, in the event that the
            # new document's content contains fewer chunks than the previous
@@ -710,43 +704,22 @@ class OpenSearchDocumentIndex(DocumentIndex):
            # if the chunk count has actually decreased. This assumes that
            # overlapping chunks are perfectly overwritten. If we can't
            # guarantee that then we need the code as-is.
-            if onyx_document.id not in deleted_doc_ids:
-                num_chunks_deleted = self.delete(
-                    onyx_document.id, onyx_document.chunk_count
-                )
-                deleted_doc_ids.add(onyx_document.id)
-                # If we see that chunks were deleted we assume the doc already
-                # existed. We record the result before bulk_index_documents
-                # runs. If indexing raises, this entire result list is discarded
-                # by the caller's retry logic, so early recording is safe.
-                document_indexing_results.append(
-                    DocumentInsertionRecord(
-                        document_id=onyx_document.id,
-                        already_existed=num_chunks_deleted > 0,
-                    )
-                )
+            num_chunks_deleted = self.delete(
+                onyx_document.id, onyx_document.chunk_count
+            )
+            # If we see that chunks were deleted we assume the doc already
+            # existed.
+            document_insertion_record = DocumentInsertionRecord(
+                document_id=onyx_document.id,
+                already_existed=num_chunks_deleted > 0,
+            )
            # Now index. This will raise if a chunk of the same ID exists, which
            # we do not expect because we should have deleted all chunks.
            self._client.bulk_index_documents(
                documents=chunk_batch,
                tenant_state=self._tenant_state,
            )
-
-        for chunk in chunks:
-            doc_id = chunk.source_document.id
-            if doc_id != current_doc_id:
-                if current_chunks:
-                    _flush_chunks(current_chunks)
-                current_doc_id = doc_id
-                current_chunks = [chunk]
-            elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:
-                _flush_chunks(current_chunks)
-                current_chunks = [chunk]
-            else:
-                current_chunks.append(chunk)
-
-        if current_chunks:
-            _flush_chunks(current_chunks)
+            document_indexing_results.append(document_insertion_record)

        return document_indexing_results

--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -6,7 +6,6 @@ import re
 import time
 import urllib
 import zipfile
-from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from datetime import timedelta
@@ -462,7 +461,7 @@ class VespaIndex(DocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
--- a/backend/onyx/document_index/vespa/vespa_document_index.py
+++ b/backend/onyx/document_index/vespa/vespa_document_index.py
@@ -1,8 +1,6 @@
 import concurrent.futures
 import logging
 import random
-from collections.abc import Generator
-from collections.abc import Iterable
 from typing import Any
 from uuid import UUID

@@ -10,7 +8,6 @@ import httpx
 from pydantic import BaseModel
 from retry import retry

-from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER
 from onyx.configs.app_configs import RERANK_COUNT
 from onyx.configs.chat_configs import DOC_TIME_DECAY
@@ -321,7 +318,7 @@ class VespaDocumentIndex(DocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
        doc_id_to_chunk_cnt_diff = indexing_metadata.doc_id_to_chunk_cnt_diff
@@ -341,31 +338,22 @@ class VespaDocumentIndex(DocumentIndex):

        # Vespa has restrictions on valid characters, yet document IDs come from
        # external w.r.t. this class. We need to sanitize them.
-        #
-        # Instead of materializing all cleaned chunks upfront, we stream them
-        # through a generator that cleans IDs and builds the original-ID mapping
-        # incrementally as chunks flow into Vespa.
-        def _clean_and_track(
-            chunks_iter: Iterable[DocMetadataAwareIndexChunk],
-            id_map: dict[str, str],
-            seen_ids: set[str],
-        ) -> Generator[DocMetadataAwareIndexChunk, None, None]:
-            """Cleans chunk IDs and builds the original-ID mapping
-            incrementally as chunks flow through, avoiding a separate
-            materialization pass."""
-            for chunk in chunks_iter:
-                original_id = chunk.source_document.id
-                cleaned = clean_chunk_id_copy(chunk)
-                cleaned_id = cleaned.source_document.id
-                # Needed so the final DocumentInsertionRecord returned can have
-                # the original document ID. cleaned_chunks might not contain IDs
-                # exactly as callers supplied them.
-                id_map[cleaned_id] = original_id
-                seen_ids.add(cleaned_id)
-                yield cleaned
+        cleaned_chunks: list[DocMetadataAwareIndexChunk] = [
+            clean_chunk_id_copy(chunk) for chunk in chunks
+        ]
+        assert len(cleaned_chunks) == len(
+            chunks
+        ), "Bug: Cleaned chunks and input chunks have different lengths."

-        new_document_id_to_original_document_id: dict[str, str] = {}
-        all_cleaned_doc_ids: set[str] = set()
+        # Needed so the final DocumentInsertionRecord returned can have the
+        # original document ID. cleaned_chunks might not contain IDs exactly as
+        # callers supplied them.
+        new_document_id_to_original_document_id: dict[str, str] = dict()
+        for i, cleaned_chunk in enumerate(cleaned_chunks):
+            old_chunk = chunks[i]
+            new_document_id_to_original_document_id[
+                cleaned_chunk.source_document.id
+            ] = old_chunk.source_document.id

        existing_docs: set[str] = set()

@@ -421,16 +409,8 @@ class VespaDocumentIndex(DocumentIndex):
                    executor=executor,
                )

-            # Insert new Vespa documents, streaming through the cleaning
-            # pipeline so chunks are never fully materialized.
-            cleaned_chunks = _clean_and_track(
-                chunks,
-                new_document_id_to_original_document_id,
-                all_cleaned_doc_ids,
-            )
-            for chunk_batch in batch_generator(
-                cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)
-            ):
+            # Insert new Vespa documents.
+            for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
                batch_index_vespa_chunks(
                    chunks=chunk_batch,
                    index_name=self._index_name,
@@ -439,6 +419,10 @@ class VespaDocumentIndex(DocumentIndex):
                    executor=executor,
                )

+        all_cleaned_doc_ids: set[str] = {
+            chunk.source_document.id for chunk in cleaned_chunks
+        }
+
        return [
            DocumentInsertionRecord(
                document_id=new_document_id_to_original_document_id[cleaned_doc_id],
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -21,6 +21,7 @@ import chardet
 import openpyxl
 from PIL import Image

+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
 from onyx.configs.constants import ONYX_METADATA_FILENAME
 from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
 from onyx.file_processing.file_types import OnyxFileExtensions
@@ -44,15 +45,26 @@ KNOWN_OPENPYXL_BUGS = [
    "Value must be either numerical or a string containing a wildcard",
    "File contains no valid workbook part",
    "Unable to read workbook: could not read stylesheet from None",
-    "Colors must be aRGB hex values",
 ]


 def get_markitdown_converter() -> "MarkItDown":
    global _MARKITDOWN_CONVERTER
-    from markitdown import MarkItDown

    if _MARKITDOWN_CONVERTER is None:
+        from markitdown import MarkItDown
+
+        # Patch this function to effectively no-op because we were seeing this
+        # module take an inordinate amount of time to convert charts to markdown,
+        # making some powerpoint files with many or complicated charts nearly
+        # unindexable.
+        from markitdown.converters._pptx_converter import PptxConverter
+
+        setattr(
+            PptxConverter,
+            "_convert_chart_to_markdown",
+            lambda self, chart: "\n\n[chart omitted]\n\n",  # noqa: ARG005
+        )
        _MARKITDOWN_CONVERTER = MarkItDown(enable_plugins=False)
    return _MARKITDOWN_CONVERTER

@@ -177,6 +189,56 @@ def read_text_file(
    return file_content_raw, metadata


+def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
+    """Return the number of embedded images in a PDF, short-circuiting at cap+1.
+
+    Used to reject PDFs whose image count would OOM the user-file-processing
+    worker during indexing. Returns a value > cap as a sentinel once the count
+    exceeds the cap, so callers do not iterate thousands of image objects just
+    to report a number. Returns 0 if the PDF cannot be parsed.
+
+    Owner-password-only PDFs (permission restrictions but no open password) are
+    counted normally — they decrypt with an empty string. Truly password-locked
+    PDFs are skipped (return 0) since we can't inspect them; the caller should
+    ensure the password-protected check runs first.
+
+    Always restores the file pointer to its original position before returning.
+    """
+    from pypdf import PdfReader
+
+    try:
+        start_pos = file.tell()
+    except Exception:
+        start_pos = None
+    try:
+        if start_pos is not None:
+            file.seek(0)
+        reader = PdfReader(file)
+        if reader.is_encrypted:
+            # Try empty password first (owner-password-only PDFs); give up if that fails.
+            try:
+                if reader.decrypt("") == 0:
+                    return 0
+            except Exception:
+                return 0
+        count = 0
+        for page in reader.pages:
+            for _ in page.images:
+                count += 1
+                if count > cap:
+                    return count
+        return count
+    except Exception:
+        logger.warning("Failed to count embedded images in PDF", exc_info=True)
+        return 0
+    finally:
+        if start_pos is not None:
+            try:
+                file.seek(start_pos)
+            except Exception:
+                pass
+
+
 def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
    """
    Extract text from a PDF. For embedded images, a more complex approach is needed.
@@ -203,18 +265,26 @@ def read_pdf_file(
    try:
        pdf_reader = PdfReader(file)

-        if pdf_reader.is_encrypted and pdf_pass is not None:
+        if pdf_reader.is_encrypted:
+            # Try the explicit password first, then fall back to an empty
+            # string.  Owner-password-only PDFs (permission restrictions but
+            # no open password) decrypt successfully with "".
+            # See https://github.com/onyx-dot-app/onyx/issues/9754
+            passwords = [p for p in [pdf_pass, ""] if p is not None]
            decrypt_success = False
-            try:
-                decrypt_success = pdf_reader.decrypt(pdf_pass) != 0
-            except Exception:
-                logger.error("Unable to decrypt pdf")
+            for pw in passwords:
+                try:
+                    if pdf_reader.decrypt(pw) != 0:
+                        decrypt_success = True
+                        break
+                except Exception:
+                    pass

            if not decrypt_success:
+                logger.error(
+                    "Encrypted PDF could not be decrypted, returning empty text."
+                )
                return "", metadata, []
-        elif pdf_reader.is_encrypted:
-            logger.warning("No Password for an encrypted PDF, returning empty text.")
-            return "", metadata, []

        # Basic PDF metadata
        if pdf_reader.metadata is not None:
@@ -232,8 +302,27 @@ def read_pdf_file(
        )

        if extract_images:
+            image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+            images_processed = 0
+            cap_reached = False
            for page_num, page in enumerate(pdf_reader.pages):
+                if cap_reached:
+                    break
                for image_file_object in page.images:
+                    if images_processed >= image_cap:
+                        # Defense-in-depth backstop. Upload-time validation
+                        # should have rejected files exceeding the cap, but
+                        # we also break here so a single oversized file can
+                        # never pin a worker.
+                        logger.warning(
+                            "PDF embedded image cap reached (%d). "
+                            "Skipping remaining images on page %d and beyond.",
+                            image_cap,
+                            page_num + 1,
+                        )
+                        cap_reached = True
+                        break
+
                    image = Image.open(io.BytesIO(image_file_object.data))
                    img_byte_arr = io.BytesIO()
                    image.save(img_byte_arr, format=image.format)
@@ -246,6 +335,7 @@ def read_pdf_file(
                        image_callback(img_bytes, image_name)
                    else:
                        extracted_images.append((img_bytes, image_name))
+                    images_processed += 1

        return text, metadata, extracted_images

--- a/backend/onyx/file_processing/password_validation.py
+++ b/backend/onyx/file_processing/password_validation.py
@@ -33,8 +33,20 @@ def is_pdf_protected(file: IO[Any]) -> bool:

    with preserve_position(file):
        reader = PdfReader(file)
+        if not reader.is_encrypted:
+            return False

-    return bool(reader.is_encrypted)
+        # PDFs with only an owner password (permission restrictions like
+        # print/copy disabled) use an empty user password — any viewer can open
+        # them without prompting.  decrypt("") returns 0 only when a real user
+        # password is required.  See https://github.com/onyx-dot-app/onyx/issues/9754
+        try:
+            return reader.decrypt("") == 0
+        except Exception:
+            logger.exception(
+                "Failed to evaluate PDF encryption; treating as password protected"
+            )
+            return True


 def is_docx_protected(file: IO[Any]) -> bool:
--- a/backend/onyx/indexing/adapters/document_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/document_indexing_adapter.py
@@ -19,8 +19,7 @@ from onyx.db.document import update_docs_updated_at__no_commit
 from onyx.db.document_set import fetch_document_sets_for_documents
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
 from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
-from onyx.indexing.models import ChunkEnrichmentContext
-from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import BuildMetadataAwareChunksResult
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -86,21 +85,14 @@ class DocumentIndexingBatchAdapter:
        ) as transaction:
            yield transaction

-    def prepare_enrichment(
+    def build_metadata_aware_chunks(
        self,
-        context: DocumentBatchPrepareContext,
+        chunks_with_embeddings: list[IndexChunk],
+        chunk_content_scores: list[float],
        tenant_id: str,
-        chunks: list[DocAwareChunk],
-    ) -> "DocumentChunkEnricher":
-        """Do all DB lookups once and return a per-chunk enricher."""
-        updatable_ids = [doc.id for doc in context.updatable_docs]
-
-        doc_id_to_new_chunk_cnt: dict[str, int] = {
-            doc_id: 0 for doc_id in updatable_ids
-        }
-        for chunk in chunks:
-            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
-                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
+        context: DocumentBatchPrepareContext,
+    ) -> BuildMetadataAwareChunksResult:
+        """Enrich chunks with access, document sets, boosts, token counts, and hierarchy."""

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -110,30 +102,67 @@ class DocumentIndexingBatchAdapter:
            is_public=False,
        )

-        return DocumentChunkEnricher(
-            doc_id_to_access_info=get_access_for_documents(
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+
+        doc_id_to_access_info = get_access_for_documents(
+            document_ids=updatable_ids, db_session=self.db_session
+        )
+        doc_id_to_document_set = {
+            document_id: document_sets
+            for document_id, document_sets in fetch_document_sets_for_documents(
                document_ids=updatable_ids, db_session=self.db_session
-            ),
-            doc_id_to_document_set={
-                document_id: document_sets
-                for document_id, document_sets in fetch_document_sets_for_documents(
-                    document_ids=updatable_ids, db_session=self.db_session
-                )
-            },
-            doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(
-                context.updatable_docs, tenant_id
-            ),
-            id_to_boost_map=context.id_to_boost_map,
-            doc_id_to_previous_chunk_cnt={
-                document_id: chunk_count
-                for document_id, chunk_count in fetch_chunk_counts_for_documents(
-                    document_ids=updatable_ids,
-                    db_session=self.db_session,
-                )
-            },
-            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
-            no_access=no_access,
-            tenant_id=tenant_id,
+            )
+        }
+
+        doc_id_to_previous_chunk_cnt: dict[str, int] = {
+            document_id: chunk_count
+            for document_id, chunk_count in fetch_chunk_counts_for_documents(
+                document_ids=updatable_ids,
+                db_session=self.db_session,
+            )
+        }
+
+        doc_id_to_new_chunk_cnt: dict[str, int] = {
+            doc_id: 0 for doc_id in updatable_ids
+        }
+        for chunk in chunks_with_embeddings:
+            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
+                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
+
+        # Get ancestor hierarchy node IDs for each document
+        doc_id_to_ancestor_ids = self._get_ancestor_ids_for_documents(
+            context.updatable_docs, tenant_id
+        )
+
+        access_aware_chunks = [
+            DocMetadataAwareIndexChunk.from_index_chunk(
+                index_chunk=chunk,
+                access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
+                document_sets=set(
+                    doc_id_to_document_set.get(chunk.source_document.id, [])
+                ),
+                user_project=[],
+                personas=[],
+                boost=(
+                    context.id_to_boost_map[chunk.source_document.id]
+                    if chunk.source_document.id in context.id_to_boost_map
+                    else DEFAULT_BOOST
+                ),
+                tenant_id=tenant_id,
+                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
+                ancestor_hierarchy_node_ids=doc_id_to_ancestor_ids[
+                    chunk.source_document.id
+                ],
+            )
+            for chunk_num, chunk in enumerate(chunks_with_embeddings)
+        ]
+
+        return BuildMetadataAwareChunksResult(
+            chunks=access_aware_chunks,
+            doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
+            doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
+            user_file_id_to_raw_text={},
+            user_file_id_to_token_count={},
        )

    def _get_ancestor_ids_for_documents(
@@ -174,7 +203,7 @@ class DocumentIndexingBatchAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        enrichment: ChunkEnrichmentContext,
+        result: BuildMetadataAwareChunksResult,
    ) -> None:
        """Finalize DB updates, store plaintext, and mark docs as indexed."""
        updatable_ids = [doc.id for doc in context.updatable_docs]
@@ -198,7 +227,7 @@ class DocumentIndexingBatchAdapter:

        update_docs_chunk_count__no_commit(
            document_ids=updatable_ids,
-            doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,
+            doc_id_to_chunk_count=result.doc_id_to_new_chunk_cnt,
            db_session=self.db_session,
        )

@@ -220,52 +249,3 @@ class DocumentIndexingBatchAdapter:
        )

        self.db_session.commit()
-
-
-class DocumentChunkEnricher:
-    """Pre-computed metadata for per-chunk enrichment of connector documents."""
-
-    def __init__(
-        self,
-        doc_id_to_access_info: dict[str, DocumentAccess],
-        doc_id_to_document_set: dict[str, list[str]],
-        doc_id_to_ancestor_ids: dict[str, list[int]],
-        id_to_boost_map: dict[str, int],
-        doc_id_to_previous_chunk_cnt: dict[str, int],
-        doc_id_to_new_chunk_cnt: dict[str, int],
-        no_access: DocumentAccess,
-        tenant_id: str,
-    ) -> None:
-        self._doc_id_to_access_info = doc_id_to_access_info
-        self._doc_id_to_document_set = doc_id_to_document_set
-        self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids
-        self._id_to_boost_map = id_to_boost_map
-        self._no_access = no_access
-        self._tenant_id = tenant_id
-        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
-        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
-
-    def enrich_chunk(
-        self, chunk: IndexChunk, score: float
-    ) -> DocMetadataAwareIndexChunk:
-        return DocMetadataAwareIndexChunk.from_index_chunk(
-            index_chunk=chunk,
-            access=self._doc_id_to_access_info.get(
-                chunk.source_document.id, self._no_access
-            ),
-            document_sets=set(
-                self._doc_id_to_document_set.get(chunk.source_document.id, [])
-            ),
-            user_project=[],
-            personas=[],
-            boost=(
-                self._id_to_boost_map[chunk.source_document.id]
-                if chunk.source_document.id in self._id_to_boost_map
-                else DEFAULT_BOOST
-            ),
-            tenant_id=self._tenant_id,
-            aggregated_chunk_boost_factor=score,
-            ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[
-                chunk.source_document.id
-            ],
-        )
--- a/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
@@ -1,9 +1,6 @@
-from __future__ import annotations
-
 import contextlib
 import datetime
 import time
-from collections import defaultdict
 from collections.abc import Generator
 from uuid import UUID

@@ -27,13 +24,11 @@ from onyx.db.user_file import fetch_persona_ids_for_user_files
 from onyx.db.user_file import fetch_user_project_ids_for_user_files
 from onyx.file_store.utils import store_user_file_plaintext
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
-from onyx.indexing.models import ChunkEnrichmentContext
-from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import BuildMetadataAwareChunksResult
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
 from onyx.llm.factory import get_default_llm
-from onyx.natural_language_processing.utils import count_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.utils.logger import setup_logger

@@ -106,20 +101,13 @@ class UserFileIndexingAdapter:
                f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}"
            )

-    def prepare_enrichment(
+    def build_metadata_aware_chunks(
        self,
-        context: DocumentBatchPrepareContext,
+        chunks_with_embeddings: list[IndexChunk],
+        chunk_content_scores: list[float],
        tenant_id: str,
-        chunks: list[DocAwareChunk],
-    ) -> UserFileChunkEnricher:
-        """Do all DB lookups and pre-compute file metadata from chunks."""
-        updatable_ids = [doc.id for doc in context.updatable_docs]
-
-        doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)
-        content_by_file: dict[str, list[str]] = defaultdict(list)
-        for chunk in chunks:
-            doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
-            content_by_file[chunk.source_document.id].append(chunk.content)
+        context: DocumentBatchPrepareContext,
+    ) -> BuildMetadataAwareChunksResult:

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -129,6 +117,7 @@ class UserFileIndexingAdapter:
            is_public=False,
        )

+        updatable_ids = [doc.id for doc in context.updatable_docs]
        user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
@@ -149,6 +138,17 @@ class UserFileIndexingAdapter:
            )
        }

+        user_file_id_to_new_chunk_cnt: dict[str, int] = {
+            user_file_id: len(
+                [
+                    chunk
+                    for chunk in chunks_with_embeddings
+                    if chunk.source_document.id == user_file_id
+                ]
+            )
+            for user_file_id in updatable_ids
+        }
+
        # Initialize tokenizer used for token count calculation
        try:
            llm = get_default_llm()
@@ -163,30 +163,46 @@ class UserFileIndexingAdapter:
        user_file_id_to_raw_text: dict[str, str] = {}
        user_file_id_to_token_count: dict[str, int | None] = {}
        for user_file_id in updatable_ids:
-            contents = content_by_file.get(user_file_id)
-            if contents:
-                combined_content = " ".join(contents)
+            user_file_chunks = [
+                chunk
+                for chunk in chunks_with_embeddings
+                if chunk.source_document.id == user_file_id
+            ]
+            if user_file_chunks:
+                combined_content = " ".join(
+                    [chunk.content for chunk in user_file_chunks]
+                )
                user_file_id_to_raw_text[str(user_file_id)] = combined_content
-                token_count: int = (
-                    count_tokens(combined_content, llm_tokenizer)
-                    if llm_tokenizer
-                    else 0
+                token_count = (
+                    len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
                )
                user_file_id_to_token_count[str(user_file_id)] = token_count
            else:
                user_file_id_to_raw_text[str(user_file_id)] = ""
                user_file_id_to_token_count[str(user_file_id)] = None

-        return UserFileChunkEnricher(
-            user_file_id_to_access=user_file_id_to_access,
-            user_file_id_to_project_ids=user_file_id_to_project_ids,
-            user_file_id_to_persona_ids=user_file_id_to_persona_ids,
+        access_aware_chunks = [
+            DocMetadataAwareIndexChunk.from_index_chunk(
+                index_chunk=chunk,
+                access=user_file_id_to_access.get(chunk.source_document.id, no_access),
+                document_sets=set(),
+                user_project=user_file_id_to_project_ids.get(
+                    chunk.source_document.id, []
+                ),
+                personas=user_file_id_to_persona_ids.get(chunk.source_document.id, []),
+                boost=DEFAULT_BOOST,
+                tenant_id=tenant_id,
+                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
+            )
+            for chunk_num, chunk in enumerate(chunks_with_embeddings)
+        ]
+
+        return BuildMetadataAwareChunksResult(
+            chunks=access_aware_chunks,
            doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
-            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
+            doc_id_to_new_chunk_cnt=user_file_id_to_new_chunk_cnt,
            user_file_id_to_raw_text=user_file_id_to_raw_text,
            user_file_id_to_token_count=user_file_id_to_token_count,
-            no_access=no_access,
-            tenant_id=tenant_id,
        )

    def _notify_assistant_owners_if_files_ready(
@@ -230,9 +246,8 @@ class UserFileIndexingAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],  # noqa: ARG002
        filtered_documents: list[Document],  # noqa: ARG002
-        enrichment: ChunkEnrichmentContext,
+        result: BuildMetadataAwareChunksResult,
    ) -> None:
-        assert isinstance(enrichment, UserFileChunkEnricher)
        user_file_ids = [doc.id for doc in context.updatable_docs]

        user_files = (
@@ -248,10 +263,8 @@ class UserFileIndexingAdapter:
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
-            user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(
-                str(user_file.id), 0
-            )
-            user_file.token_count = enrichment.user_file_id_to_token_count[
+            user_file.chunk_count = result.doc_id_to_new_chunk_cnt[str(user_file.id)]
+            user_file.token_count = result.user_file_id_to_token_count[
                str(user_file.id)
            ]

@@ -263,54 +276,8 @@ class UserFileIndexingAdapter:
        # Store the plaintext in the file store for faster retrieval
        # NOTE: this creates its own session to avoid committing the overall
        # transaction.
-        for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():
+        for user_file_id, raw_text in result.user_file_id_to_raw_text.items():
            store_user_file_plaintext(
                user_file_id=UUID(user_file_id),
                plaintext_content=raw_text,
            )
-
-
-class UserFileChunkEnricher:
-    """Pre-computed metadata for per-chunk enrichment of user-uploaded files."""
-
-    def __init__(
-        self,
-        user_file_id_to_access: dict[str, DocumentAccess],
-        user_file_id_to_project_ids: dict[str, list[int]],
-        user_file_id_to_persona_ids: dict[str, list[int]],
-        doc_id_to_previous_chunk_cnt: dict[str, int],
-        doc_id_to_new_chunk_cnt: dict[str, int],
-        user_file_id_to_raw_text: dict[str, str],
-        user_file_id_to_token_count: dict[str, int | None],
-        no_access: DocumentAccess,
-        tenant_id: str,
-    ) -> None:
-        self._user_file_id_to_access = user_file_id_to_access
-        self._user_file_id_to_project_ids = user_file_id_to_project_ids
-        self._user_file_id_to_persona_ids = user_file_id_to_persona_ids
-        self._no_access = no_access
-        self._tenant_id = tenant_id
-        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
-        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
-        self.user_file_id_to_raw_text = user_file_id_to_raw_text
-        self.user_file_id_to_token_count = user_file_id_to_token_count
-
-    def enrich_chunk(
-        self, chunk: IndexChunk, score: float
-    ) -> DocMetadataAwareIndexChunk:
-        return DocMetadataAwareIndexChunk.from_index_chunk(
-            index_chunk=chunk,
-            access=self._user_file_id_to_access.get(
-                chunk.source_document.id, self._no_access
-            ),
-            document_sets=set(),
-            user_project=self._user_file_id_to_project_ids.get(
-                chunk.source_document.id, []
-            ),
-            personas=self._user_file_id_to_persona_ids.get(
-                chunk.source_document.id, []
-            ),
-            boost=DEFAULT_BOOST,
-            tenant_id=self._tenant_id,
-            aggregated_chunk_boost_factor=score,
-        )
--- a/backend/onyx/indexing/chunk_batch_store.py
+++ b/backend/onyx/indexing/chunk_batch_store.py
@@ -1,89 +0,0 @@
-import pickle
-import shutil
-import tempfile
-from collections.abc import Iterator
-from pathlib import Path
-
-from onyx.indexing.models import IndexChunk
-
-
-class ChunkBatchStore:
-    """Manages serialization of embedded chunks to a temporary directory.
-
-    Owns the temp directory lifetime and provides save/load/stream/scrub
-    operations.
-
-    Use as a context manager to ensure cleanup::
-
-        with ChunkBatchStore() as store:
-            store.save(chunks, batch_idx=0)
-            for chunk in store.stream():
-                ...
-    """
-
-    _EXT = ".pkl"
-
-    def __init__(self) -> None:
-        self._tmpdir: Path | None = None
-
-    # -- context manager -----------------------------------------------------
-
-    def __enter__(self) -> "ChunkBatchStore":
-        self._tmpdir = Path(tempfile.mkdtemp(prefix="onyx_embeddings_"))
-        return self
-
-    def __exit__(self, *_exc: object) -> None:
-        if self._tmpdir is not None:
-            shutil.rmtree(self._tmpdir, ignore_errors=True)
-            self._tmpdir = None
-
-    @property
-    def _dir(self) -> Path:
-        assert self._tmpdir is not None, "ChunkBatchStore used outside context manager"
-        return self._tmpdir
-
-    # -- storage primitives --------------------------------------------------
-
-    def save(self, chunks: list[IndexChunk], batch_idx: int) -> None:
-        """Serialize a batch of embedded chunks to disk."""
-        with open(self._dir / f"batch_{batch_idx}{self._EXT}", "wb") as f:
-            pickle.dump(chunks, f)
-
-    def _load(self, batch_file: Path) -> list[IndexChunk]:
-        """Deserialize a batch of embedded chunks from a file."""
-        with open(batch_file, "rb") as f:
-            return pickle.load(f)
-
-    def _batch_files(self) -> list[Path]:
-        """Return batch files sorted by numeric index."""
-        return sorted(
-            self._dir.glob(f"batch_*{self._EXT}"),
-            key=lambda p: int(p.stem.removeprefix("batch_")),
-        )
-
-    # -- higher-level operations ---------------------------------------------
-
-    def stream(self) -> Iterator[IndexChunk]:
-        """Yield all chunks across all batch files.
-
-        Each call returns a fresh generator, so the data can be iterated
-        multiple times (e.g. once per document index).
-        """
-        for batch_file in self._batch_files():
-            yield from self._load(batch_file)
-
-    def scrub_failed_docs(self, failed_doc_ids: set[str]) -> None:
-        """Remove chunks belonging to *failed_doc_ids* from all batch files.
-
-        When a document fails embedding in batch N, earlier batches may
-        already contain successfully embedded chunks for that document.
-        This ensures the output is all-or-nothing per document.
-        """
-        for batch_file in self._batch_files():
-            batch_chunks = self._load(batch_file)
-            cleaned = [
-                c for c in batch_chunks if c.source_document.id not in failed_doc_ids
-            ]
-            if len(cleaned) != len(batch_chunks):
-                with open(batch_file, "wb") as f:
-                    pickle.dump(cleaned, f)
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -1,8 +1,5 @@
 from collections import defaultdict
 from collections.abc import Callable
-from collections.abc import Generator
-from collections.abc import Iterator
-from contextlib import contextmanager
 from typing import Protocol

 from pydantic import BaseModel
@@ -12,7 +9,6 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
 from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
-from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
 from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
 from onyx.configs.app_configs import USE_CHUNK_SUMMARY
@@ -47,12 +43,10 @@ from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.file_processing.image_summarization import summarize_image_with_error_handling
 from onyx.file_store.file_store import get_default_file_store
-from onyx.indexing.chunk_batch_store import ChunkBatchStore
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import embed_chunks_with_failure_handling
 from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.models import DocAwareChunk
-from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexingBatchAdapter
 from onyx.indexing.models import UpdatableChunkData
 from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
@@ -69,7 +63,6 @@ from onyx.natural_language_processing.utils import tokenizer_trim_middle
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
 from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
-from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger
 from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
@@ -98,20 +91,6 @@ class IndexingPipelineResult(BaseModel):

    failures: list[ConnectorFailure]

-    @classmethod
-    def empty(cls, total_docs: int) -> "IndexingPipelineResult":
-        return cls(
-            new_docs=0,
-            total_docs=total_docs,
-            total_chunks=0,
-            failures=[],
-        )
-
-
-class ChunkEmbeddingResult(BaseModel):
-    successful_chunk_ids: list[tuple[int, str]]  # (chunk_id, document_id)
-    connector_failures: list[ConnectorFailure]
-

 class IndexingPipelineProtocol(Protocol):
    def __call__(
@@ -160,110 +139,6 @@ def _upsert_documents_in_db(
        )


-def _get_failed_doc_ids(failures: list[ConnectorFailure]) -> set[str]:
-    """Extract document IDs from a list of connector failures."""
-    return {f.failed_document.document_id for f in failures if f.failed_document}
-
-
-def _embed_chunks_to_store(
-    chunks: list[DocAwareChunk],
-    embedder: IndexingEmbedder,
-    tenant_id: str,
-    request_id: str | None,
-    store: ChunkBatchStore,
-) -> ChunkEmbeddingResult:
-    """Embed chunks in batches, spilling each batch to *store*.
-
-    If a document fails embedding in any batch, its chunks are excluded from
-    all batches (including earlier ones already written) so that the output
-    is all-or-nothing per document.
-    """
-    successful_chunk_ids: list[tuple[int, str]] = []
-    all_embedding_failures: list[ConnectorFailure] = []
-    # Track failed doc IDs across all batches so that a failure in batch N
-    # causes chunks for that doc to be skipped in batch N+1 and stripped
-    # from earlier batches.
-    all_failed_doc_ids: set[str] = set()
-
-    for batch_idx, chunk_batch in enumerate(
-        batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)
-    ):
-        # Skip chunks belonging to documents that failed in earlier batches.
-        chunk_batch = [
-            c for c in chunk_batch if c.source_document.id not in all_failed_doc_ids
-        ]
-        if not chunk_batch:
-            continue
-
-        logger.debug(f"Embedding batch {batch_idx}: {len(chunk_batch)} chunks")
-
-        chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(
-            chunks=chunk_batch,
-            embedder=embedder,
-            tenant_id=tenant_id,
-            request_id=request_id,
-        )
-        all_embedding_failures.extend(embedding_failures)
-        all_failed_doc_ids.update(_get_failed_doc_ids(embedding_failures))
-
-        # Only keep successfully embedded chunks for non-failed docs.
-        chunks_with_embeddings = [
-            c
-            for c in chunks_with_embeddings
-            if c.source_document.id not in all_failed_doc_ids
-        ]
-
-        successful_chunk_ids.extend(
-            (c.chunk_id, c.source_document.id) for c in chunks_with_embeddings
-        )
-
-        store.save(chunks_with_embeddings, batch_idx)
-        del chunks_with_embeddings
-
-    # Scrub earlier batches for docs that failed in later batches.
-    if all_failed_doc_ids:
-        store.scrub_failed_docs(all_failed_doc_ids)
-        successful_chunk_ids = [
-            (chunk_id, doc_id)
-            for chunk_id, doc_id in successful_chunk_ids
-            if doc_id not in all_failed_doc_ids
-        ]
-
-    return ChunkEmbeddingResult(
-        successful_chunk_ids=successful_chunk_ids,
-        connector_failures=all_embedding_failures,
-    )
-
-
-@contextmanager
-def embed_and_stream(
-    chunks: list[DocAwareChunk],
-    embedder: IndexingEmbedder,
-    tenant_id: str,
-    request_id: str | None,
-) -> Generator[tuple[ChunkEmbeddingResult, ChunkBatchStore], None, None]:
-    """Embed chunks to disk and yield a ``(result, store)`` pair.
-
-    The store owns the temp directory — files are cleaned up when the context
-    manager exits.
-
-    Usage::
-
-        with embed_and_stream(chunks, embedder, tenant_id, req_id) as (result, store):
-            for chunk in store.stream():
-                ...
-    """
-    with ChunkBatchStore() as store:
-        result = _embed_chunks_to_store(
-            chunks=chunks,
-            embedder=embedder,
-            tenant_id=tenant_id,
-            request_id=request_id,
-            store=store,
-        )
-        yield result, store
-
-
 def get_doc_ids_to_update(
    documents: list[Document], db_docs: list[DBDocument]
 ) -> list[Document]:
@@ -762,29 +637,6 @@ def add_contextual_summaries(
    return chunks


-def _verify_indexing_completeness(
-    insertion_records: list[DocumentInsertionRecord],
-    write_failures: list[ConnectorFailure],
-    embedding_failed_doc_ids: set[str],
-    updatable_ids: list[str],
-    document_index_name: str,
-) -> None:
-    """Verify that every updatable document was either indexed or reported as failed."""
-    all_returned_doc_ids = (
-        {r.document_id for r in insertion_records}
-        | {f.failed_document.document_id for f in write_failures if f.failed_document}
-        | embedding_failed_doc_ids
-    )
-    if all_returned_doc_ids != set(updatable_ids):
-        raise RuntimeError(
-            f"Some documents were not successfully indexed. "
-            f"Updatable IDs: {updatable_ids}, "
-            f"Returned IDs: {all_returned_doc_ids}. "
-            f"This should never happen. "
-            f"This occured for document index {document_index_name}"
-        )
-
-
@log_function_time(debug_only=True)
 def index_doc_batch(
    *,
@@ -820,7 +672,12 @@ def index_doc_batch(
    filtered_documents = filter_fnc(document_batch)
    context = adapter.prepare(filtered_documents, ignore_time_skip)
    if not context:
-        return IndexingPipelineResult.empty(len(filtered_documents))
+        return IndexingPipelineResult(
+            new_docs=0,
+            total_docs=len(filtered_documents),
+            total_chunks=0,
+            failures=[],
+        )

    # Convert documents to IndexingDocument objects with processed section
    # logger.debug("Processing image sections")
@@ -859,99 +716,117 @@ def index_doc_batch(
        )

    logger.debug("Starting embedding")
-    with embed_and_stream(chunks, embedder, tenant_id, request_id) as (
-        embedding_result,
-        chunk_store,
-    ):
-        updatable_ids = [doc.id for doc in context.updatable_docs]
-        updatable_chunk_data = [
-            UpdatableChunkData(
-                chunk_id=chunk_id,
-                document_id=document_id,
-                boost_score=1.0,
-            )
-            for chunk_id, document_id in embedding_result.successful_chunk_ids
-        ]
+    chunks_with_embeddings, embedding_failures = (
+        embed_chunks_with_failure_handling(
+            chunks=chunks,
+            embedder=embedder,
+            tenant_id=tenant_id,
+            request_id=request_id,
+        )
+        if chunks
+        else ([], [])
+    )

-        embedding_failed_doc_ids = _get_failed_doc_ids(
-            embedding_result.connector_failures
+    chunk_content_scores = [1.0] * len(chunks_with_embeddings)
+
+    updatable_ids = [doc.id for doc in context.updatable_docs]
+    updatable_chunk_data = [
+        UpdatableChunkData(
+            chunk_id=chunk.chunk_id,
+            document_id=chunk.source_document.id,
+            boost_score=score,
+        )
+        for chunk, score in zip(chunks_with_embeddings, chunk_content_scores)
+    ]
+
+    # Acquires a lock on the documents so that no other process can modify them
+    # NOTE: don't need to acquire till here, since this is when the actual race condition
+    # with Vespa can occur.
+    with adapter.lock_context(context.updatable_docs):
+        # we're concerned about race conditions where multiple simultaneous indexings might result
+        # in one set of metadata overwriting another one in vespa.
+        # we still write data here for the immediate and most likely correct sync, but
+        # to resolve this, an update of the last modified field at the end of this loop
+        # always triggers a final metadata sync via the celery queue
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=chunks_with_embeddings,
+            chunk_content_scores=chunk_content_scores,
+            tenant_id=tenant_id,
+            context=context,
        )

-        # Filter to only successfully embedded chunks so
-        # doc_id_to_new_chunk_cnt reflects what's actually written to Vespa.
-        embedded_chunks = [
-            c for c in chunks if c.source_document.id not in embedding_failed_doc_ids
-        ]
+        short_descriptor_list = [chunk.to_short_descriptor() for chunk in result.chunks]
+        short_descriptor_log = str(short_descriptor_list)[:1024]
+        logger.debug(f"Indexing the following chunks: {short_descriptor_log}")

-        # Acquires a lock on the documents so that no other process can modify
-        # them.  Not needed until here, since this is when the actual race
-        # condition with vector db can occur.
-        with adapter.lock_context(context.updatable_docs):
-            enricher = adapter.prepare_enrichment(
-                context=context,
-                tenant_id=tenant_id,
-                chunks=embedded_chunks,
+        primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
+        primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
+        for document_index in document_indices:
+            # A document will not be spread across different batches, so all the
+            # documents with chunks in this set, are fully represented by the chunks
+            # in this set
+            (
+                insertion_records,
+                vector_db_write_failures,
+            ) = write_chunks_to_vector_db_with_backoff(
+                document_index=document_index,
+                chunks=result.chunks,
+                index_batch_params=IndexBatchParams(
+                    doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
+                    doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
+                    tenant_id=tenant_id,
+                    large_chunks_enabled=chunker.enable_large_chunks,
+                ),
            )

-            index_batch_params = IndexBatchParams(
-                doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,
-                doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,
-                tenant_id=tenant_id,
-                large_chunks_enabled=chunker.enable_large_chunks,
-            )
-
-            primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = (
-                None
-            )
-            primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = (
-                None
-            )
-
-            for document_index in document_indices:
-
-                def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:
-                    for chunk in chunk_store.stream():
-                        yield enricher.enrich_chunk(chunk, 1.0)
-
-                insertion_records, write_failures = (
-                    write_chunks_to_vector_db_with_backoff(
-                        document_index=document_index,
-                        make_chunks=_enriched_stream,
-                        index_batch_params=index_batch_params,
-                    )
+            all_returned_doc_ids: set[str] = (
+                {record.document_id for record in insertion_records}
+                .union(
+                    {
+                        record.failed_document.document_id
+                        for record in vector_db_write_failures
+                        if record.failed_document
+                    }
                )
-
-                _verify_indexing_completeness(
-                    insertion_records=insertion_records,
-                    write_failures=write_failures,
-                    embedding_failed_doc_ids=embedding_failed_doc_ids,
-                    updatable_ids=updatable_ids,
-                    document_index_name=document_index.__class__.__name__,
+                .union(
+                    {
+                        record.failed_document.document_id
+                        for record in embedding_failures
+                        if record.failed_document
+                    }
                )
-                # We treat the first document index we got as the primary one used
-                # for reporting the state of indexing.
-                if primary_doc_idx_insertion_records is None:
-                    primary_doc_idx_insertion_records = insertion_records
-                if primary_doc_idx_vector_db_write_failures is None:
-                    primary_doc_idx_vector_db_write_failures = write_failures
-
-            adapter.post_index(
-                context=context,
-                updatable_chunk_data=updatable_chunk_data,
-                filtered_documents=filtered_documents,
-                enrichment=enricher,
            )
+            if all_returned_doc_ids != set(updatable_ids):
+                raise RuntimeError(
+                    f"Some documents were not successfully indexed. "
+                    f"Updatable IDs: {updatable_ids}, "
+                    f"Returned IDs: {all_returned_doc_ids}. "
+                    "This should never happen."
+                    f"This occured for document index {document_index.__class__.__name__}"
+                )
+            # We treat the first document index we got as the primary one used
+            # for reporting the state of indexing.
+            if primary_doc_idx_insertion_records is None:
+                primary_doc_idx_insertion_records = insertion_records
+            if primary_doc_idx_vector_db_write_failures is None:
+                primary_doc_idx_vector_db_write_failures = vector_db_write_failures
+
+        adapter.post_index(
+            context=context,
+            updatable_chunk_data=updatable_chunk_data,
+            filtered_documents=filtered_documents,
+            result=result,
+        )

    assert primary_doc_idx_insertion_records is not None
    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
-        new_docs=sum(
-            1 for r in primary_doc_idx_insertion_records if not r.already_existed
+        new_docs=len(
+            [r for r in primary_doc_idx_insertion_records if not r.already_existed]
        ),
        total_docs=len(filtered_documents),
-        total_chunks=len(embedding_result.successful_chunk_ids),
-        failures=primary_doc_idx_vector_db_write_failures
-        + embedding_result.connector_failures,
+        total_chunks=len(chunks_with_embeddings),
+        failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
    )


--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -235,16 +235,12 @@ class UpdatableChunkData(BaseModel):
    boost_score: float


-class ChunkEnrichmentContext(Protocol):
-    """Returned by prepare_enrichment. Holds pre-computed metadata lookups
-    and provides per-chunk enrichment."""
-
+class BuildMetadataAwareChunksResult(BaseModel):
+    chunks: list[DocMetadataAwareIndexChunk]
    doc_id_to_previous_chunk_cnt: dict[str, int]
    doc_id_to_new_chunk_cnt: dict[str, int]
-
-    def enrich_chunk(
-        self, chunk: IndexChunk, score: float
-    ) -> DocMetadataAwareIndexChunk: ...
+    user_file_id_to_raw_text: dict[str, str]
+    user_file_id_to_token_count: dict[str, int | None]


 class IndexingBatchAdapter(Protocol):
@@ -258,24 +254,18 @@ class IndexingBatchAdapter(Protocol):
    ) -> Generator[TransactionalContext, None, None]:
        """Provide a transaction/row-lock context for critical updates."""

-    def prepare_enrichment(
+    def build_metadata_aware_chunks(
        self,
-        context: "DocumentBatchPrepareContext",
+        chunks_with_embeddings: list[IndexChunk],
+        chunk_content_scores: list[float],
        tenant_id: str,
-        chunks: list[DocAwareChunk],
-    ) -> ChunkEnrichmentContext:
-        """Prepare per-chunk enrichment data (access, document sets, boost, etc.).
-
-        Precondition: ``chunks`` have already been through the embedding step
-        (i.e. they are ``IndexChunk`` instances with populated embeddings,
-        passed here as the base ``DocAwareChunk`` type).
-        """
-        ...
+        context: "DocumentBatchPrepareContext",
+    ) -> BuildMetadataAwareChunksResult: ...

    def post_index(
        self,
        context: "DocumentBatchPrepareContext",
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        enrichment: ChunkEnrichmentContext,
+        result: BuildMetadataAwareChunksResult,
    ) -> None: ...
--- a/backend/onyx/indexing/vector_db_insertion.py
+++ b/backend/onyx/indexing/vector_db_insertion.py
@@ -1,9 +1,6 @@
 import time
-from collections.abc import Callable
-from collections.abc import Iterable
+from collections import defaultdict
 from http import HTTPStatus
-from itertools import chain
-from itertools import groupby

 import httpx

@@ -31,22 +28,22 @@ def _log_insufficient_storage_error(e: Exception) -> None:

 def write_chunks_to_vector_db_with_backoff(
    document_index: DocumentIndex,
-    make_chunks: Callable[[], Iterable[DocMetadataAwareIndexChunk]],
+    chunks: list[DocMetadataAwareIndexChunk],
    index_batch_params: IndexBatchParams,
 ) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:
    """Tries to insert all chunks in one large batch. If that batch fails for any reason,
    goes document by document to isolate the failure(s).

    IMPORTANT: must pass in whole documents at a time not individual chunks, since the
-    vector DB interface assumes that all chunks for a single document are present. The
-    chunks must also be in contiguous batches
+    vector DB interface assumes that all chunks for a single document are present.
    """
+
    # first try to write the chunks to the vector db
    try:
        return (
            list(
                document_index.index(
-                    chunks=make_chunks(),
+                    chunks=chunks,
                    index_batch_params=index_batch_params,
                )
            ),
@@ -63,23 +60,14 @@ def write_chunks_to_vector_db_with_backoff(
        # wait a couple seconds just to give the vector db a chance to recover
        time.sleep(2)

+    # try writing each doc one by one
+    chunks_for_docs: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(list)
+    for chunk in chunks:
+        chunks_for_docs[chunk.source_document.id].append(chunk)
+
    insertion_records: list[DocumentInsertionRecord] = []
    failures: list[ConnectorFailure] = []
-
-    def key(chunk: DocMetadataAwareIndexChunk) -> str:
-        return chunk.source_document.id
-
-    seen_doc_ids: set[str] = set()
-    for doc_id, chunks_for_doc in groupby(make_chunks(), key=key):
-        if doc_id in seen_doc_ids:
-            raise RuntimeError(
-                f"Doc chunks are not arriving in order. Current doc_id={doc_id}, seen_doc_ids={list(seen_doc_ids)}"
-            )
-        seen_doc_ids.add(doc_id)
-
-        first_chunk = next(chunks_for_doc)
-        chunks_for_doc = chain([first_chunk], chunks_for_doc)
-
+    for doc_id, chunks_for_doc in chunks_for_docs.items():
        try:
            insertion_records.extend(
                document_index.index(
@@ -99,7 +87,9 @@ def write_chunks_to_vector_db_with_backoff(
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=doc_id,
-                        document_link=first_chunk.get_link(),
+                        document_link=(
+                            chunks_for_doc[0].get_link() if chunks_for_doc else None
+                        ),
                    ),
                    failure_message=str(e),
                    exception=e,
--- a/backend/onyx/llm/constants.py
+++ b/backend/onyx/llm/constants.py
@@ -26,6 +26,7 @@ class LlmProviderNames(str, Enum):
    MISTRAL = "mistral"
    LITELLM_PROXY = "litellm_proxy"
    BIFROST = "bifrost"
+    OPENAI_COMPATIBLE = "openai_compatible"

    def __str__(self) -> str:
        """Needed so things like:
@@ -46,6 +47,7 @@ WELL_KNOWN_PROVIDER_NAMES = [
    LlmProviderNames.LM_STUDIO,
    LlmProviderNames.LITELLM_PROXY,
    LlmProviderNames.BIFROST,
+    LlmProviderNames.OPENAI_COMPATIBLE,
 ]


@@ -64,6 +66,7 @@ PROVIDER_DISPLAY_NAMES: dict[str, str] = {
    LlmProviderNames.LM_STUDIO: "LM Studio",
    LlmProviderNames.LITELLM_PROXY: "LiteLLM Proxy",
    LlmProviderNames.BIFROST: "Bifrost",
+    LlmProviderNames.OPENAI_COMPATIBLE: "OpenAI Compatible",
    "groq": "Groq",
    "anyscale": "Anyscale",
    "deepseek": "DeepSeek",
@@ -116,6 +119,7 @@ AGGREGATOR_PROVIDERS: set[str] = {
    LlmProviderNames.AZURE,
    LlmProviderNames.LITELLM_PROXY,
    LlmProviderNames.BIFROST,
+    LlmProviderNames.OPENAI_COMPATIBLE,
 }

 # Model family name mappings for display name generation
--- a/backend/onyx/llm/multi_llm.py
+++ b/backend/onyx/llm/multi_llm.py
@@ -175,6 +175,28 @@ def _strip_tool_content_from_messages(
    return result


+def _fix_tool_user_message_ordering(
+    messages: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Insert a synthetic assistant message between tool and user messages.
+
+    Some models (e.g. Mistral on Azure) require strict message ordering where
+    a user message cannot immediately follow a tool message. This function
+    inserts a minimal assistant message to bridge the gap.
+    """
+    if len(messages) < 2:
+        return messages
+
+    result: list[dict[str, Any]] = [messages[0]]
+    for msg in messages[1:]:
+        prev_role = result[-1].get("role")
+        curr_role = msg.get("role")
+        if prev_role == "tool" and curr_role == "user":
+            result.append({"role": "assistant", "content": "Noted. Continuing."})
+        result.append(msg)
+    return result
+
+
 def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
    """Check if any messages contain tool-related content blocks."""
    for msg in messages:
@@ -305,12 +327,19 @@ class LitellmLLM(LLM):
        ):
            model_kwargs[VERTEX_LOCATION_KWARG] = "global"

-        # Bifrost: OpenAI-compatible proxy that expects model names in
-        # provider/model format (e.g. "anthropic/claude-sonnet-4-6").
-        # We route through LiteLLM's openai provider with the Bifrost base URL,
-        # and ensure /v1 is appended.
-        if model_provider == LlmProviderNames.BIFROST:
+        # Bifrost and OpenAI-compatible: OpenAI-compatible proxies that send
+        # model names directly to the endpoint. We route through LiteLLM's
+        # openai provider with the server's base URL, and ensure /v1 is appended.
+        if model_provider in (
+            LlmProviderNames.BIFROST,
+            LlmProviderNames.OPENAI_COMPATIBLE,
+        ):
            self._custom_llm_provider = "openai"
+            # LiteLLM's OpenAI client requires an api_key to be set.
+            # Many OpenAI-compatible servers don't need auth, so supply a
+            # placeholder to prevent LiteLLM from raising AuthenticationError.
+            if not self._api_key:
+                model_kwargs.setdefault("api_key", "not-needed")
            if self._api_base is not None:
                base = self._api_base.rstrip("/")
                self._api_base = base if base.endswith("/v1") else f"{base}/v1"
@@ -427,17 +456,20 @@ class LitellmLLM(LLM):
        optional_kwargs: dict[str, Any] = {}

        # Model name
-        is_bifrost = self._model_provider == LlmProviderNames.BIFROST
+        is_openai_compatible_proxy = self._model_provider in (
+            LlmProviderNames.BIFROST,
+            LlmProviderNames.OPENAI_COMPATIBLE,
+        )
        model_provider = (
            f"{self.config.model_provider}/responses"
            if is_openai_model  # Uses litellm's completions -> responses bridge
            else self.config.model_provider
        )
-        if is_bifrost:
-            # Bifrost expects model names in provider/model format
-            # (e.g. "anthropic/claude-sonnet-4-6") sent directly to its
-            # OpenAI-compatible endpoint. We use custom_llm_provider="openai"
-            # so LiteLLM doesn't try to route based on the provider prefix.
+        if is_openai_compatible_proxy:
+            # OpenAI-compatible proxies (Bifrost, generic OpenAI-compatible
+            # servers) expect model names sent directly to their endpoint.
+            # We use custom_llm_provider="openai" so LiteLLM doesn't try
+            # to route based on the provider prefix.
            model = self.config.deployment_name or self.config.model_name
        else:
            model = f"{model_provider}/{self.config.deployment_name or self.config.model_name}"
@@ -528,7 +560,10 @@ class LitellmLLM(LLM):
        if structured_response_format:
            optional_kwargs["response_format"] = structured_response_format

-        if not (is_claude_model or is_ollama or is_mistral) or is_bifrost:
+        if (
+            not (is_claude_model or is_ollama or is_mistral)
+            or is_openai_compatible_proxy
+        ):
            # Litellm bug: tool_choice is dropped silently if not specified here for OpenAI
            # However, this param breaks Anthropic and Mistral models,
            # so it must be conditionally included unless the request is
@@ -576,6 +611,18 @@ class LitellmLLM(LLM):
                ):
                    messages = _strip_tool_content_from_messages(messages)

+                # Some models (e.g. Mistral) reject a user message
+                # immediately after a tool message. Insert a synthetic
+                # assistant bridge message to satisfy the ordering
+                # constraint. Check both the provider and the deployment/
+                # model name to catch Mistral hosted on Azure.
+                model_or_deployment = (
+                    self._deployment_name or self._model_version or ""
+                ).lower()
+                is_mistral_model = is_mistral or "mistral" in model_or_deployment
+                if is_mistral_model:
+                    messages = _fix_tool_user_message_ordering(messages)
+
                # Only pass tool_choice when tools are present — some providers (e.g. Fireworks)
                # reject requests where tool_choice is explicitly null.
                if tools and tool_choice is not None:
--- a/backend/onyx/llm/override_models.py
+++ b/backend/onyx/llm/override_models.py
@@ -8,24 +8,6 @@ from pydantic import BaseModel


 class LLMOverride(BaseModel):
-    """Per-request LLM settings that override persona defaults.
-
-    All fields are optional — only the fields that differ from the persona's
-    configured LLM need to be supplied. Used both over the wire (API requests)
-    and for multi-model comparison, where one override is supplied per model.
-
-    Attributes:
-        model_provider: LLM provider slug (e.g. ``"openai"``, ``"anthropic"``).
-            When ``None``, the persona's default provider is used.
-        model_version: Specific model version string (e.g. ``"gpt-4o"``).
-            When ``None``, the persona's default model is used.
-        temperature: Sampling temperature in ``[0, 2]``. When ``None``, the
-            persona's default temperature is used.
-        display_name: Human-readable label shown in the UI for this model,
-            e.g. ``"GPT-4 Turbo"``. Optional; falls back to ``model_version``
-            when not set.
-    """
-
    model_provider: str | None = None
    model_version: str | None = None
    temperature: float | None = None
--- a/backend/onyx/llm/well_known_providers/constants.py
+++ b/backend/onyx/llm/well_known_providers/constants.py
@@ -15,6 +15,8 @@ LITELLM_PROXY_PROVIDER_NAME = "litellm_proxy"

 BIFROST_PROVIDER_NAME = "bifrost"

+OPENAI_COMPATIBLE_PROVIDER_NAME = "openai_compatible"
+
 # Providers that use optional Bearer auth from custom_config
 PROVIDERS_WITH_SPECIAL_API_KEY_HANDLING: dict[str, str] = {
    LlmProviderNames.OLLAMA_CHAT: OLLAMA_API_KEY_CONFIG_KEY,
--- a/backend/onyx/llm/well_known_providers/llm_provider_options.py
+++ b/backend/onyx/llm/well_known_providers/llm_provider_options.py
@@ -19,6 +19,7 @@ from onyx.llm.well_known_providers.constants import BIFROST_PROVIDER_NAME
 from onyx.llm.well_known_providers.constants import LITELLM_PROXY_PROVIDER_NAME
 from onyx.llm.well_known_providers.constants import LM_STUDIO_PROVIDER_NAME
 from onyx.llm.well_known_providers.constants import OLLAMA_PROVIDER_NAME
+from onyx.llm.well_known_providers.constants import OPENAI_COMPATIBLE_PROVIDER_NAME
 from onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME
 from onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME
 from onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME
@@ -51,6 +52,7 @@ def _get_provider_to_models_map() -> dict[str, list[str]]:
        OPENROUTER_PROVIDER_NAME: [],  # Dynamic - fetched from OpenRouter API
        LITELLM_PROXY_PROVIDER_NAME: [],  # Dynamic - fetched from LiteLLM proxy API
        BIFROST_PROVIDER_NAME: [],  # Dynamic - fetched from Bifrost API
+        OPENAI_COMPATIBLE_PROVIDER_NAME: [],  # Dynamic - fetched from OpenAI-compatible API
    }


@@ -336,6 +338,7 @@ def get_provider_display_name(provider_name: str) -> str:
        VERTEXAI_PROVIDER_NAME: "Google Vertex AI",
        OPENROUTER_PROVIDER_NAME: "OpenRouter",
        LITELLM_PROXY_PROVIDER_NAME: "LiteLLM Proxy",
+        OPENAI_COMPATIBLE_PROVIDER_NAME: "OpenAI Compatible",
    }

    if provider_name in _ONYX_PROVIDER_DISPLAY_NAMES:
--- a/backend/onyx/mcp_server_main.py
+++ b/backend/onyx/mcp_server_main.py
@@ -6,6 +6,7 @@ from onyx.configs.app_configs import MCP_SERVER_ENABLED
 from onyx.configs.app_configs import MCP_SERVER_HOST
 from onyx.configs.app_configs import MCP_SERVER_PORT
 from onyx.utils.logger import setup_logger
+from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable

 logger = setup_logger()

@@ -16,6 +17,7 @@ def main() -> None:
        logger.info("MCP server is disabled (MCP_SERVER_ENABLED=false)")
        return

+    set_is_ee_based_on_env_variable()
    logger.info(f"Starting MCP server on {MCP_SERVER_HOST}:{MCP_SERVER_PORT}")

    from onyx.mcp_server.api import mcp_app
--- a/backend/onyx/natural_language_processing/utils.py
+++ b/backend/onyx/natural_language_processing/utils.py
@@ -175,32 +175,6 @@ def get_tokenizer(
    return _check_tokenizer_cache(provider_type, model_name)


-# Max characters per encode() call.
-_ENCODE_CHUNK_SIZE = 500_000
-
-
-def count_tokens(
-    text: str,
-    tokenizer: BaseTokenizer,
-    token_limit: int | None = None,
-) -> int:
-    """Count tokens, chunking the input to avoid tiktoken stack overflow.
-
-    If token_limit is provided and the text is large enough to require
-    multiple chunks (> 500k chars), stops early once the count exceeds it.
-    When early-exiting, the returned value exceeds token_limit but may be
-    less than the true full token count.
-    """
-    if len(text) <= _ENCODE_CHUNK_SIZE:
-        return len(tokenizer.encode(text))
-    total = 0
-    for start in range(0, len(text), _ENCODE_CHUNK_SIZE):
-        total += len(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))
-        if token_limit is not None and total > token_limit:
-            return total  # Already over — skip remaining chunks
-    return total
-
-
 def tokenizer_trim_content(
    content: str, desired_length: int, tokenizer: BaseTokenizer
 ) -> str:
--- a/backend/onyx/server/features/build/api/user_library.py
+++ b/backend/onyx/server/features/build/api/user_library.py
@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session

 from onyx.auth.users import current_user
 from onyx.background.celery.versioned_apps.client import app as celery_app
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -50,6 +52,9 @@ from onyx.db.engine.sql_engine import get_session
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.models import User
 from onyx.document_index.interfaces import DocumentMetadata
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
@@ -127,6 +132,49 @@ class DeleteFileResponse(BaseModel):
 # =============================================================================


+def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
+    """True if either the filename or the content-type indicates a PDF.
+
+    Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
+    ``Content-Type: application/octet-stream``), so we also fall back to
+    extension-based detection via ``mimetypes.guess_type`` on the filename.
+    """
+    if content_type == "application/pdf":
+        return True
+    guessed, _ = mimetypes.guess_type(filename)
+    return guessed == "application/pdf"
+
+
+def _check_pdf_image_caps(
+    filename: str, content: bytes, content_type: str | None, batch_total: int
+) -> int:
+    """Enforce per-file and per-batch embedded-image caps for PDFs.
+
+    Returns the number of embedded images in this file (0 for non-PDFs) so
+    callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
+    if either cap is exceeded.
+    """
+    if not _looks_like_pdf(filename, content_type):
+        return 0
+    file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+    batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
+    # Short-circuit at the larger cap so we get a useful count for both checks.
+    count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
+    if count > file_cap:
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            f"PDF '{filename}' contains too many embedded images "
+            f"(more than {file_cap}). Try splitting the document into smaller files.",
+        )
+    if batch_total + count > batch_cap:
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            f"Upload would exceed the {batch_cap}-image limit across all "
+            f"files in this batch. Try uploading fewer image-heavy files at once.",
+        )
+    return count
+
+
 def _sanitize_path(path: str) -> str:
    """Sanitize a file path, removing traversal attempts and normalizing.

@@ -355,6 +403,7 @@ async def upload_files(

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
+    batch_image_total = 0
    now = datetime.now(timezone.utc)

    # Sanitize the base path
@@ -374,6 +423,14 @@ async def upload_files(
                detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB",
            )

+        # Reject PDFs with an unreasonable per-file or per-batch image count
+        batch_image_total += _check_pdf_image_caps(
+            filename=file.filename or "unnamed",
+            content=content,
+            content_type=file.content_type,
+            batch_total=batch_image_total,
+        )
+
        # Validate cumulative storage (existing + this upload batch)
        total_size += file_size
        if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
@@ -472,6 +529,7 @@ async def upload_zip(

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
+    batch_image_total = 0

    # Extract zip contents into a subfolder named after the zip file
    zip_name = api_sanitize_filename(file.filename or "upload")
@@ -510,6 +568,36 @@ async def upload_zip(
                    logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
                    continue

+                # Skip PDFs that would trip the per-file or per-batch image
+                # cap (would OOM the user-file-processing worker). Matches
+                # /upload behavior but uses skip-and-warn to stay consistent
+                # with the zip path's handling of oversized files.
+                zip_file_name = zip_info.filename.split("/")[-1]
+                zip_content_type, _ = mimetypes.guess_type(zip_file_name)
+                if zip_content_type == "application/pdf":
+                    image_count = count_pdf_embedded_images(
+                        BytesIO(file_content),
+                        max(
+                            MAX_EMBEDDED_IMAGES_PER_FILE,
+                            MAX_EMBEDDED_IMAGES_PER_UPLOAD,
+                        ),
+                    )
+                    if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
+                        logger.warning(
+                            "Skipping '%s' - exceeds %d per-file embedded-image cap",
+                            zip_info.filename,
+                            MAX_EMBEDDED_IMAGES_PER_FILE,
+                        )
+                        continue
+                    if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
+                        logger.warning(
+                            "Skipping '%s' - would exceed %d per-batch embedded-image cap",
+                            zip_info.filename,
+                            MAX_EMBEDDED_IMAGES_PER_UPLOAD,
+                        )
+                        continue
+                    batch_image_total += image_count
+
                total_size += file_size

                # Validate cumulative storage
--- a/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
@@ -3844,9 +3844,9 @@
      }
    },
    "node_modules/@ts-morph/common/node_modules/brace-expansion": {
-      "version": "5.0.5",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
-      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
+      "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
      "license": "MIT",
      "dependencies": {
        "balanced-match": "^4.0.2"
@@ -4224,9 +4224,9 @@
      }
    },
    "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.3.tgz",
-      "integrity": "sha512-MCV/fYJEbqx68aE58kv2cA/kiky1G8vux3OR6/jbS+jIMe/6fJWa0DTzJU7dqijOWYwHi1t29FlfYI9uytqlpA==",
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
+      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -5007,9 +5007,9 @@
      }
    },
    "node_modules/brace-expansion": {
-      "version": "1.1.13",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz",
-      "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==",
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
--- a/backend/onyx/server/features/hooks/api.py
+++ b/backend/onyx/server/features/hooks/api.py
@@ -123,8 +123,9 @@ def _validate_endpoint(
    (not reachable — indicates the api_key is invalid).

    Timeout handling:
-    - Any httpx.TimeoutException (ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout) →
-      timeout (operator should consider increasing timeout_seconds).
+    - ConnectTimeout: TCP handshake never completed → cannot_connect.
+    - ReadTimeout / WriteTimeout: TCP was established, server responded slowly → timeout
+      (operator should consider increasing timeout_seconds).
    - All other exceptions → cannot_connect.
    """
    _check_ssrf_safety(endpoint_url)
--- a/backend/onyx/server/features/projects/projects_file_utils.py
+++ b/backend/onyx/server/features/projects/projects_file_utils.py
@@ -9,15 +9,23 @@ from pydantic import ConfigDict
 from pydantic import Field
 from sqlalchemy.orm import Session

+from onyx.configs.app_configs import FILE_TOKEN_COUNT_THRESHOLD
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
+from onyx.configs.app_configs import USER_FILE_MAX_UPLOAD_SIZE_BYTES
+from onyx.configs.app_configs import USER_FILE_MAX_UPLOAD_SIZE_MB
 from onyx.db.llm import fetch_default_llm_model
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
 from onyx.file_processing.file_types import OnyxFileExtensions
 from onyx.file_processing.password_validation import is_file_password_protected
-from onyx.natural_language_processing.utils import count_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
-from onyx.server.settings.store import load_settings
 from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.configs import SKIP_USERFILE_THRESHOLD
+from shared_configs.configs import SKIP_USERFILE_THRESHOLD_TENANT_LIST
+from shared_configs.contextvars import get_current_tenant_id


 logger = setup_logger()
@@ -156,8 +164,8 @@ def categorize_uploaded_files(
      document formats (.pdf, .docx, …) and falls back to a text-detection
      heuristic for unknown extensions (.py, .js, .rs, …).
    - Uses default tokenizer to compute token length.
-    - If token length exceeds the admin-configured threshold, reject file.
-    - If extension unsupported or text cannot be extracted, reject file.
+    - If token length > threshold, reject file (unless threshold skip is enabled).
+    - If text cannot be extracted, reject file.
    - Otherwise marked as acceptable.
    """

@@ -168,33 +176,41 @@ def categorize_uploaded_files(
    provider_type = default_model.llm_provider.provider if default_model else None
    tokenizer = get_tokenizer(model_name=model_name, provider_type=provider_type)

-    # Derive limits from admin-configurable settings.
-    # For upload size: load_settings() resolves 0/None to a positive default.
-    # For token threshold: 0 means "no limit" (converted to None below).
-    settings = load_settings()
-    max_upload_size_mb = (
-        settings.user_file_max_upload_size_mb
-    )  # always positive after load_settings()
-    max_upload_size_bytes = (
-        max_upload_size_mb * 1024 * 1024 if max_upload_size_mb else None
-    )
-    token_threshold_k = settings.file_token_count_threshold_k
-    token_threshold = (
-        token_threshold_k * 1000 if token_threshold_k else None
-    )  # 0 → None = no limit
+    # Check if threshold checks should be skipped
+    skip_threshold = False
+
+    # Check global skip flag (works for both single-tenant and multi-tenant)
+    if SKIP_USERFILE_THRESHOLD:
+        skip_threshold = True
+        logger.info("Skipping userfile threshold check (global setting)")
+    # Check tenant-specific skip list (only applicable in multi-tenant)
+    elif MULTI_TENANT and SKIP_USERFILE_THRESHOLD_TENANT_LIST:
+        try:
+            current_tenant_id = get_current_tenant_id()
+            skip_threshold = current_tenant_id in SKIP_USERFILE_THRESHOLD_TENANT_LIST
+            if skip_threshold:
+                logger.info(
+                    f"Skipping userfile threshold check for tenant: {current_tenant_id}"
+                )
+        except RuntimeError as e:
+            logger.warning(f"Failed to get current tenant ID: {str(e)}")
+
+    # Running total of embedded images across PDFs in this batch. Once the
+    # aggregate cap is reached, subsequent PDFs in the same upload are
+    # rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
+    batch_image_total = 0

    for upload in files:
        try:
            filename = get_safe_filename(upload)

-            # Size limit is a hard safety cap.
-            if max_upload_size_bytes is not None and is_upload_too_large(
-                upload, max_upload_size_bytes
-            ):
+            # Size limit is a hard safety cap and is enforced even when token
+            # threshold checks are skipped via SKIP_USERFILE_THRESHOLD settings.
+            if is_upload_too_large(upload, USER_FILE_MAX_UPLOAD_SIZE_BYTES):
                results.rejected.append(
                    RejectedFile(
                        filename=filename,
-                        reason=f"Exceeds {max_upload_size_mb} MB file size limit",
+                        reason=f"Exceeds {USER_FILE_MAX_UPLOAD_SIZE_MB} MB file size limit",
                    )
                )
                continue
@@ -216,11 +232,11 @@ def categorize_uploaded_files(
                    )
                    continue

-                if token_threshold is not None and token_count > token_threshold:
+                if not skip_threshold and token_count > FILE_TOKEN_COUNT_THRESHOLD:
                    results.rejected.append(
                        RejectedFile(
                            filename=filename,
-                            reason=f"Exceeds {token_threshold_k}K token limit",
+                            reason=f"Exceeds {FILE_TOKEN_COUNT_THRESHOLD} token limit",
                        )
                    )
                else:
@@ -245,6 +261,47 @@ def categorize_uploaded_files(
                    )
                    continue

+                # Reject PDFs with an unreasonable number of embedded images
+                # (either per-file or accumulated across this upload batch).
+                # A PDF with thousands of embedded images can OOM the
+                # user-file-processing celery worker because every image is
+                # decoded with PIL and then sent to the vision LLM.
+                if extension == ".pdf":
+                    file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+                    batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
+                    # Use the larger of the two caps as the short-circuit
+                    # threshold so we get a useful count for both checks.
+                    # count_pdf_embedded_images restores the stream position.
+                    count = count_pdf_embedded_images(
+                        upload.file, max(file_cap, batch_cap)
+                    )
+                    if count > file_cap:
+                        results.rejected.append(
+                            RejectedFile(
+                                filename=filename,
+                                reason=(
+                                    f"PDF contains too many embedded images "
+                                    f"(more than {file_cap}). Try splitting "
+                                    f"the document into smaller files."
+                                ),
+                            )
+                        )
+                        continue
+                    if batch_image_total + count > batch_cap:
+                        results.rejected.append(
+                            RejectedFile(
+                                filename=filename,
+                                reason=(
+                                    f"Upload would exceed the "
+                                    f"{batch_cap}-image limit across all "
+                                    f"files in this batch. Try uploading "
+                                    f"fewer image-heavy files at once."
+                                ),
+                            )
+                        )
+                        continue
+                    batch_image_total += count
+
                text_content = extract_file_text(
                    file=upload.file,
                    file_name=filename,
@@ -261,14 +318,12 @@ def categorize_uploaded_files(
                    )
                    continue

-                token_count = count_tokens(
-                    text_content, tokenizer, token_limit=token_threshold
-                )
-                if token_threshold is not None and token_count > token_threshold:
+                token_count = len(tokenizer.encode(text_content))
+                if not skip_threshold and token_count > FILE_TOKEN_COUNT_THRESHOLD:
                    results.rejected.append(
                        RejectedFile(
                            filename=filename,
-                            reason=f"Exceeds {token_threshold_k}K token limit",
+                            reason=f"Exceeds {FILE_TOKEN_COUNT_THRESHOLD} token limit",
                        )
                    )
                else:
--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -74,6 +74,8 @@ from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.server.manage.llm.models import OllamaFinalModelResponse
 from onyx.server.manage.llm.models import OllamaModelDetails
 from onyx.server.manage.llm.models import OllamaModelsRequest
+from onyx.server.manage.llm.models import OpenAICompatibleFinalModelResponse
+from onyx.server.manage.llm.models import OpenAICompatibleModelsRequest
 from onyx.server.manage.llm.models import OpenRouterFinalModelResponse
 from onyx.server.manage.llm.models import OpenRouterModelDetails
 from onyx.server.manage.llm.models import OpenRouterModelsRequest
@@ -1575,3 +1577,95 @@ def _get_bifrost_models_response(api_base: str, api_key: str | None = None) -> d
        source_name="Bifrost",
        api_key=api_key,
    )
+
+
+@admin_router.post("/openai-compatible/available-models")
+def get_openai_compatible_server_available_models(
+    request: OpenAICompatibleModelsRequest,
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> list[OpenAICompatibleFinalModelResponse]:
+    """Fetch available models from a generic OpenAI-compatible /v1/models endpoint."""
+    response_json = _get_openai_compatible_server_response(
+        api_base=request.api_base, api_key=request.api_key
+    )
+
+    models = response_json.get("data", [])
+    if not isinstance(models, list) or len(models) == 0:
+        raise OnyxError(
+            OnyxErrorCode.VALIDATION_ERROR,
+            "No models found from your OpenAI-compatible endpoint",
+        )
+
+    results: list[OpenAICompatibleFinalModelResponse] = []
+    for model in models:
+        try:
+            model_id = model.get("id", "")
+            model_name = model.get("name", model_id)
+
+            if not model_id:
+                continue
+
+            # Skip embedding models
+            if is_embedding_model(model_id):
+                continue
+
+            results.append(
+                OpenAICompatibleFinalModelResponse(
+                    name=model_id,
+                    display_name=model_name,
+                    max_input_tokens=model.get("context_length"),
+                    supports_image_input=infer_vision_support(model_id),
+                    supports_reasoning=is_reasoning_model(model_id, model_name),
+                )
+            )
+        except Exception as e:
+            logger.warning(
+                "Failed to parse OpenAI-compatible model entry",
+                extra={"error": str(e), "item": str(model)[:1000]},
+            )
+
+    if not results:
+        raise OnyxError(
+            OnyxErrorCode.VALIDATION_ERROR,
+            "No compatible models found from OpenAI-compatible endpoint",
+        )
+
+    sorted_results = sorted(results, key=lambda m: m.name.lower())
+
+    # Sync new models to DB if provider_name is specified
+    if request.provider_name:
+        _sync_fetched_models(
+            db_session=db_session,
+            provider_name=request.provider_name,
+            models=[
+                SyncModelEntry(
+                    name=r.name,
+                    display_name=r.display_name,
+                    max_input_tokens=r.max_input_tokens,
+                    supports_image_input=r.supports_image_input,
+                )
+                for r in sorted_results
+            ],
+            source_label="OpenAI Compatible",
+        )
+
+    return sorted_results
+
+
+def _get_openai_compatible_server_response(
+    api_base: str, api_key: str | None = None
+) -> dict:
+    """Perform GET to an OpenAI-compatible /v1/models and return parsed JSON."""
+    cleaned_api_base = api_base.strip().rstrip("/")
+    # Ensure we hit /v1/models
+    if cleaned_api_base.endswith("/v1"):
+        url = f"{cleaned_api_base}/models"
+    else:
+        url = f"{cleaned_api_base}/v1/models"
+
+    return _get_openai_compatible_models_response(
+        url=url,
+        source_name="OpenAI Compatible",
+        api_key=api_key,
+    )
--- a/backend/onyx/server/manage/llm/models.py
+++ b/backend/onyx/server/manage/llm/models.py
@@ -464,3 +464,18 @@ class BifrostFinalModelResponse(BaseModel):
    max_input_tokens: int | None
    supports_image_input: bool
    supports_reasoning: bool
+
+
+# OpenAI Compatible dynamic models fetch
+class OpenAICompatibleModelsRequest(BaseModel):
+    api_base: str
+    api_key: str | None = None
+    provider_name: str | None = None  # Optional: to save models to existing provider
+
+
+class OpenAICompatibleFinalModelResponse(BaseModel):
+    name: str  # Model ID (e.g. "meta-llama/Llama-3-8B-Instruct")
+    display_name: str  # Human-readable name from API
+    max_input_tokens: int | None
+    supports_image_input: bool
+    supports_reasoning: bool
--- a/backend/onyx/server/manage/llm/utils.py
+++ b/backend/onyx/server/manage/llm/utils.py
@@ -26,6 +26,7 @@ DYNAMIC_LLM_PROVIDERS = frozenset(
        LlmProviderNames.OLLAMA_CHAT,
        LlmProviderNames.LM_STUDIO,
        LlmProviderNames.BIFROST,
+        LlmProviderNames.OPENAI_COMPATIBLE,
    }
 )

--- a/backend/onyx/server/manage/models.py
+++ b/backend/onyx/server/manage/models.py
@@ -147,6 +147,7 @@ class UserInfo(BaseModel):
        is_anonymous_user: bool | None = None,
        tenant_info: TenantInfo | None = None,
        assistant_specific_configs: UserSpecificAssistantPreferences | None = None,
+        memories: list[MemoryItem] | None = None,
    ) -> "UserInfo":
        return cls(
            id=str(user.id),
@@ -191,10 +192,7 @@ class UserInfo(BaseModel):
                role=user.personal_role or "",
                use_memories=user.use_memories,
                enable_memory_tool=user.enable_memory_tool,
-                memories=[
-                    MemoryItem(id=memory.id, content=memory.memory_text)
-                    for memory in (user.memories or [])
-                ],
+                memories=memories or [],
                user_preferences=user.user_preferences or "",
            ),
        )
--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -57,6 +57,7 @@ from onyx.db.user_preferences import activate_user
 from onyx.db.user_preferences import deactivate_user
 from onyx.db.user_preferences import get_all_user_assistant_specific_configs
 from onyx.db.user_preferences import get_latest_access_token_for_user
+from onyx.db.user_preferences import get_memories_for_user
 from onyx.db.user_preferences import update_assistant_preferences
 from onyx.db.user_preferences import update_user_assistant_visibility
 from onyx.db.user_preferences import update_user_auto_scroll
@@ -823,6 +824,11 @@ def verify_user_logged_in(
            [],
        ),
    )
+    memories = [
+        MemoryItem(id=memory.id, content=memory.memory_text)
+        for memory in get_memories_for_user(user.id, db_session)
+    ]
+
    user_info = UserInfo.from_model(
        user,
        current_token_created_at=token_created_at,
@@ -833,6 +839,7 @@ def verify_user_logged_in(
            new_tenant=new_tenant,
            invitation=tenant_invitation,
        ),
+        memories=memories,
    )

    return user_info
@@ -930,7 +937,8 @@ def update_user_personalization_api(
        else user.enable_memory_tool
    )
    existing_memories = [
-        MemoryItem(id=memory.id, content=memory.memory_text) for memory in user.memories
+        MemoryItem(id=memory.id, content=memory.memory_text)
+        for memory in get_memories_for_user(user.id, db_session)
    ]
    new_memories = (
        request.memories if request.memories is not None else existing_memories
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -28,7 +28,6 @@ from onyx.chat.chat_utils import extract_headers
 from onyx.chat.models import ChatFullResponse
 from onyx.chat.models import CreateChatSessionID
 from onyx.chat.process_message import gather_stream_full
-from onyx.chat.process_message import handle_multi_model_stream
 from onyx.chat.process_message import handle_stream_message_objects
 from onyx.chat.prompt_utils import get_default_base_system_prompt
 from onyx.chat.stop_signal_checker import set_fence
@@ -47,7 +46,6 @@ from onyx.db.chat import get_chat_messages_by_session
 from onyx.db.chat import get_chat_session_by_id
 from onyx.db.chat import get_chat_sessions_by_user
 from onyx.db.chat import set_as_latest_chat_message
-from onyx.db.chat import set_preferred_response
 from onyx.db.chat import translate_db_message_to_chat_message_detail
 from onyx.db.chat import update_chat_session
 from onyx.db.chat_search import search_chat_sessions
@@ -62,8 +60,6 @@ from onyx.db.persona import get_persona_by_id
 from onyx.db.usage import increment_usage
 from onyx.db.usage import UsageType
 from onyx.db.user_file import get_file_id_by_user_file_id
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.file_store.file_store import get_default_file_store
 from onyx.llm.constants import LlmProviderNames
 from onyx.llm.factory import get_default_llm
@@ -85,7 +81,6 @@ from onyx.server.query_and_chat.models import ChatSessionUpdateRequest
 from onyx.server.query_and_chat.models import MessageOrigin
 from onyx.server.query_and_chat.models import RenameChatSessionResponse
 from onyx.server.query_and_chat.models import SendMessageRequest
-from onyx.server.query_and_chat.models import SetPreferredResponseRequest
 from onyx.server.query_and_chat.models import UpdateChatSessionTemperatureRequest
 from onyx.server.query_and_chat.models import UpdateChatSessionThreadRequest
 from onyx.server.query_and_chat.session_loading import (
@@ -575,46 +570,6 @@ def handle_send_chat_message(
    if get_hashed_api_key_from_request(request) or get_hashed_pat_from_request(request):
        chat_message_req.origin = MessageOrigin.API

-    # Multi-model streaming path: 2-3 LLMs in parallel (streaming only)
-    is_multi_model = (
-        chat_message_req.llm_overrides is not None
-        and len(chat_message_req.llm_overrides) > 1
-    )
-    if is_multi_model and chat_message_req.stream:
-        # Narrowed here; is_multi_model already checked llm_overrides is not None
-        llm_overrides = chat_message_req.llm_overrides or []
-
-        def multi_model_stream_generator() -> Generator[str, None, None]:
-            try:
-                with get_session_with_current_tenant() as db_session:
-                    for obj in handle_multi_model_stream(
-                        new_msg_req=chat_message_req,
-                        user=user,
-                        db_session=db_session,
-                        llm_overrides=llm_overrides,
-                        litellm_additional_headers=extract_headers(
-                            request.headers, LITELLM_PASS_THROUGH_HEADERS
-                        ),
-                        custom_tool_additional_headers=get_custom_tool_additional_request_headers(
-                            request.headers
-                        ),
-                        mcp_headers=chat_message_req.mcp_headers,
-                    ):
-                        yield get_json_line(obj.model_dump())
-            except Exception as e:
-                logger.exception("Error in multi-model streaming")
-                yield json.dumps({"error": str(e)})
-
-        return StreamingResponse(
-            multi_model_stream_generator(), media_type="text/event-stream"
-        )
-
-    if is_multi_model and not chat_message_req.stream:
-        raise OnyxError(
-            OnyxErrorCode.INVALID_INPUT,
-            "Multi-model mode (llm_overrides with >1 entry) requires stream=True.",
-        )
-
    # Non-streaming path: consume all packets and return complete response
    if not chat_message_req.stream:
        with get_session_with_current_tenant() as db_session:
@@ -705,30 +660,6 @@ def set_message_as_latest(
    )


-@router.put("/set-preferred-response")
-def set_preferred_response_endpoint(
-    request_body: SetPreferredResponseRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> None:
-    """Set the preferred assistant response for a multi-model turn."""
-    try:
-        # Ownership check: get_chat_message raises ValueError if the message
-        # doesn't belong to this user, preventing cross-user mutation.
-        get_chat_message(
-            chat_message_id=request_body.user_message_id,
-            user_id=user.id if user else None,
-            db_session=db_session,
-        )
-        set_preferred_response(
-            db_session=db_session,
-            user_message_id=request_body.user_message_id,
-            preferred_assistant_message_id=request_body.preferred_response_id,
-        )
-    except ValueError as e:
-        raise OnyxError(OnyxErrorCode.INVALID_INPUT, str(e))
-
-
@router.post("/create-chat-message-feedback")
 def create_chat_feedback(
    feedback: ChatFeedbackRequest,
--- a/backend/onyx/server/query_and_chat/placement.py
+++ b/backend/onyx/server/query_and_chat/placement.py
@@ -2,25 +2,11 @@ from pydantic import BaseModel


 class Placement(BaseModel):
-    """Coordinates that identify where a streaming packet belongs in the UI.
-
-    The frontend uses these fields to route each packet to the correct turn,
-    tool tab, agent sub-turn, and (in multi-model mode) response column.
-
-    Attributes:
-        turn_index: Monotonically increasing index of the iterative reasoning block
-            (e.g. tool call round) within this chat message. Lower values happened first.
-        tab_index: Disambiguates parallel tool calls within the same turn so each
-            tool's output can be displayed in its own tab.
-        sub_turn_index: Nesting level for tools that invoke other tools. ``None`` for
-            top-level packets; an integer for tool-within-tool output.
-        model_index: Which model this packet belongs to. ``0`` for single-model
-            responses; ``0``, ``1``, or ``2`` for multi-model comparison. ``None``
-            for pre-LLM setup packets (e.g. message ID info) that are yielded
-            before any Emitter runs.
-    """
-
+    # Which iterative block in the UI is this part of, these are ordered and smaller ones happened first
    turn_index: int
+    # For parallel tool calls to preserve order of execution
    tab_index: int = 0
+    # Used for tools/agents that call other tools, this currently doesn't support nested agents but can be added later
    sub_turn_index: int | None = None
+    # For multi-model streaming: identifies which model (0, 1, 2) this packet belongs to.
    model_index: int | None = None
--- a/backend/onyx/server/settings/api.py
+++ b/backend/onyx/server/settings/api.py
@@ -9,9 +9,7 @@ from onyx import __version__ as onyx_version
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
 from onyx.auth.users import is_user_admin
-from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
 from onyx.configs.app_configs import DISABLE_VECTOR_DB
-from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.configs.constants import NotificationType
 from onyx.db.engine.sql_engine import get_session
@@ -19,16 +17,10 @@ from onyx.db.models import User
 from onyx.db.notification import dismiss_all_notifications
 from onyx.db.notification import get_notifications
 from onyx.db.notification import update_notification_last_shown
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.hooks.utils import HOOKS_AVAILABLE
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
 from onyx.server.features.build.utils import is_onyx_craft_enabled
-from onyx.server.settings.models import (
-    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
-)
-from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
 from onyx.server.settings.models import Notification
 from onyx.server.settings.models import Settings
 from onyx.server.settings.models import UserSettings
@@ -49,15 +41,6 @@ basic_router = APIRouter(prefix="/settings")
 def admin_put_settings(
    settings: Settings, _: User = Depends(current_admin_user)
 ) -> None:
-    if (
-        settings.user_file_max_upload_size_mb is not None
-        and settings.user_file_max_upload_size_mb > 0
-        and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB
-    ):
-        raise OnyxError(
-            OnyxErrorCode.INVALID_INPUT,
-            f"File upload size limit cannot exceed {MAX_ALLOWED_UPLOAD_SIZE_MB} MB",
-        )
    store_settings(settings)


@@ -100,16 +83,6 @@ def fetch_settings(
        vector_db_enabled=not DISABLE_VECTOR_DB,
        hooks_enabled=HOOKS_AVAILABLE,
        version=onyx_version,
-        max_allowed_upload_size_mb=MAX_ALLOWED_UPLOAD_SIZE_MB,
-        default_user_file_max_upload_size_mb=min(
-            DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,
-            MAX_ALLOWED_UPLOAD_SIZE_MB,
-        ),
-        default_file_token_count_threshold_k=(
-            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
-            if DISABLE_VECTOR_DB
-            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
-        ),
    )


--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -2,19 +2,12 @@ from datetime import datetime
 from enum import Enum

 from pydantic import BaseModel
-from pydantic import Field

-from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
-from onyx.configs.app_configs import DISABLE_VECTOR_DB
-from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
 from onyx.configs.constants import NotificationType
 from onyx.configs.constants import QueryHistoryType
 from onyx.db.models import Notification as NotificationDBModel
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

-DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB = 200
-DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB = 10000
-

 class PageType(str, Enum):
    CHAT = "chat"
@@ -85,12 +78,7 @@ class Settings(BaseModel):

    # User Knowledge settings
    user_knowledge_enabled: bool | None = True
-    user_file_max_upload_size_mb: int | None = Field(
-        default=DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB, ge=0
-    )
-    file_token_count_threshold_k: int | None = Field(
-        default=None, ge=0  # thousands of tokens; None = context-aware default
-    )
+    user_file_max_upload_size_mb: int | None = None

    # Connector settings
    show_extra_connectors: bool | None = True
@@ -120,14 +108,3 @@ class UserSettings(Settings):
    hooks_enabled: bool = False
    # Application version, read from the ONYX_VERSION env var at startup.
    version: str | None = None
-    # Hard ceiling for user_file_max_upload_size_mb, derived from env var.
-    max_allowed_upload_size_mb: int = MAX_ALLOWED_UPLOAD_SIZE_MB
-    # Factory defaults so the frontend can show a "restore default" button.
-    default_user_file_max_upload_size_mb: int = DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
-    default_file_token_count_threshold_k: int = Field(
-        default_factory=lambda: (
-            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
-            if DISABLE_VECTOR_DB
-            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
-        )
-    )
--- a/backend/onyx/server/settings/store.py
+++ b/backend/onyx/server/settings/store.py
@@ -1,19 +1,13 @@
 from onyx.cache.factory import get_cache_backend
-from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
 from onyx.configs.app_configs import DISABLE_USER_KNOWLEDGE
-from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
-from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
 from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
 from onyx.configs.app_configs import SHOW_EXTRA_CONNECTORS
+from onyx.configs.app_configs import USER_FILE_MAX_UPLOAD_SIZE_MB
 from onyx.configs.constants import KV_SETTINGS_KEY
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
-from onyx.server.settings.models import (
-    DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
-)
-from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
 from onyx.server.settings.models import Settings
 from onyx.utils.logger import setup_logger

@@ -57,36 +51,9 @@ def load_settings() -> Settings:
    if DISABLE_USER_KNOWLEDGE:
        settings.user_knowledge_enabled = False

+    settings.user_file_max_upload_size_mb = USER_FILE_MAX_UPLOAD_SIZE_MB
    settings.show_extra_connectors = SHOW_EXTRA_CONNECTORS
    settings.opensearch_indexing_enabled = ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
-
-    # Resolve context-aware defaults for token threshold.
-    # None = admin hasn't set a value yet → use context-aware default.
-    # 0 = admin explicitly chose "no limit" → preserve as-is.
-    if settings.file_token_count_threshold_k is None:
-        settings.file_token_count_threshold_k = (
-            DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
-            if DISABLE_VECTOR_DB
-            else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
-        )
-
-    # Upload size: 0 and None are treated as "unset" (not "no limit") →
-    # fall back to min(configured default, hard ceiling).
-    if not settings.user_file_max_upload_size_mb:
-        settings.user_file_max_upload_size_mb = min(
-            DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,
-            MAX_ALLOWED_UPLOAD_SIZE_MB,
-        )
-
-    # Clamp to env ceiling so stale KV values are capped even if the
-    # operator lowered MAX_ALLOWED_UPLOAD_SIZE_MB after a higher value
-    # was already saved (api.py only guards new writes).
-    if (
-        settings.user_file_max_upload_size_mb > 0
-        and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB
-    ):
-        settings.user_file_max_upload_size_mb = MAX_ALLOWED_UPLOAD_SIZE_MB
-
    return settings


--- a/backend/onyx/tools/fake_tools/research_agent.py
+++ b/backend/onyx/tools/fake_tools/research_agent.py
@@ -1,4 +1,3 @@
-import queue
 import time
 from collections.abc import Callable
 from typing import Any
@@ -709,6 +708,7 @@ def run_research_agent_calls(


 if __name__ == "__main__":
+    from queue import Queue
    from uuid import uuid4

    from onyx.chat.chat_state import ChatStateContainer
@@ -744,8 +744,8 @@ if __name__ == "__main__":
        if user is None:
            raise ValueError("No users found in database. Please create a user first.")

-        emitter_queue: queue.Queue = queue.Queue()
-        emitter = Emitter(merged_queue=emitter_queue)
+        bus: Queue[Packet] = Queue()
+        emitter = Emitter(bus)
        state_container = ChatStateContainer()

        tool_dict = construct_tools(
@@ -792,4 +792,4 @@ if __name__ == "__main__":
            print(result.intermediate_report)
            print("=" * 80)
            print(f"Citations: {result.citation_mapping}")
-            print(f"Total packets emitted: {emitter_queue.qsize()}")
+            print(f"Total packets emitted: {bus.qsize()}")
--- a/backend/onyx/tools/tool_implementations/custom/custom_tool.py
+++ b/backend/onyx/tools/tool_implementations/custom/custom_tool.py
@@ -1,6 +1,5 @@
 import csv
 import json
-import queue
 import uuid
 from io import BytesIO
 from io import StringIO
@@ -12,6 +11,7 @@ import requests
 from requests import JSONDecodeError

 from onyx.chat.emitter import Emitter
+from onyx.chat.emitter import get_default_emitter
 from onyx.configs.constants import FileOrigin
 from onyx.file_store.file_store import get_default_file_store
 from onyx.server.query_and_chat.placement import Placement
@@ -296,9 +296,9 @@ def build_custom_tools_from_openapi_schema_and_headers(
    url = openapi_to_url(openapi_schema)
    method_specs = openapi_to_method_specs(openapi_schema)

-    # Use a discard emitter if none provided (packets go nowhere)
+    # Use default emitter if none provided
    if emitter is None:
-        emitter = Emitter(merged_queue=queue.Queue())
+        emitter = get_default_emitter()

    return [
        CustomTool(
@@ -367,7 +367,7 @@ if __name__ == "__main__":
    tools = build_custom_tools_from_openapi_schema_and_headers(
        tool_id=0,  # dummy tool id
        openapi_schema=openapi_schema,
-        emitter=Emitter(merged_queue=queue.Queue()),
+        emitter=get_default_emitter(),
        dynamic_schema_info=None,
    )

--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -187,7 +187,7 @@ coloredlogs==15.0.1
    # via onnxruntime
 courlan==1.3.2
    # via trafilatura
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   authlib
    #   google-auth
@@ -449,7 +449,7 @@ kombu==5.5.4
    # via celery
 kubernetes==31.0.0
    # via onyx
-langchain-core==1.2.22
+langchain-core==1.2.11
    # via onyx
 langdetect==1.0.9
    # via unstructured
@@ -735,7 +735,7 @@ pyee==13.0.0
    # via playwright
 pygithub==2.5.0
    # via onyx
-pygments==2.20.0
+pygments==2.19.2
    # via rich
 pyjwt==2.12.0
    # via
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -97,7 +97,7 @@ comm==0.2.3
    # via ipykernel
 contourpy==1.3.3
    # via matplotlib
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   google-auth
    #   pyjwt
@@ -263,7 +263,7 @@ oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
-onyx-devtools==0.7.2
+onyx-devtools==0.7.1
    # via onyx
 openai==2.14.0
    # via
@@ -349,7 +349,7 @@ pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
    # via mcp
-pygments==2.20.0
+pygments==2.19.2
    # via
    #   ipython
    #   ipython-pygments-lexers
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -76,7 +76,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   google-auth
    #   pyjwt
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -92,7 +92,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   google-auth
    #   pyjwt
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -191,6 +191,25 @@ IGNORED_SYNCING_TENANT_LIST = (
    else None
 )

+# Global flag to skip userfile threshold for all users/tenants
+SKIP_USERFILE_THRESHOLD = (
+    os.environ.get("SKIP_USERFILE_THRESHOLD", "").lower() == "true"
+)
+
+# Comma-separated list of specific tenant IDs to skip threshold (multi-tenant only)
+SKIP_USERFILE_THRESHOLD_TENANT_IDS = os.environ.get(
+    "SKIP_USERFILE_THRESHOLD_TENANT_IDS"
+)
+SKIP_USERFILE_THRESHOLD_TENANT_LIST = (
+    [
+        tenant.strip()
+        for tenant in SKIP_USERFILE_THRESHOLD_TENANT_IDS.split(",")
+        if tenant.strip()
+    ]
+    if SKIP_USERFILE_THRESHOLD_TENANT_IDS
+    else None
+)
+
 ENVIRONMENT = os.environ.get("ENVIRONMENT") or "not_explicitly_set"


--- a/backend/tests/daily/connectors/slack/test_slack_perm_sync.py
+++ b/backend/tests/daily/connectors/slack/test_slack_perm_sync.py
@@ -1,6 +1,4 @@
 import time
-from datetime import datetime
-from datetime import timezone

 import pytest

@@ -19,10 +17,6 @@ PRIVATE_CHANNEL_USERS = [
    "test_user_2@onyx-test.com",
 ]

-# Predates any test workspace messages, so the result set should match
-# the "no start time" case while exercising the oldest= parameter.
-OLDEST_TS_2016 = datetime(2016, 1, 1, tzinfo=timezone.utc).timestamp()
-
 pytestmark = pytest.mark.usefixtures("enable_ee")


@@ -111,17 +105,15 @@ def test_load_from_checkpoint_access__private_channel(
    ],
    indirect=True,
 )
-@pytest.mark.parametrize("start_ts", [None, OLDEST_TS_2016])
 def test_slim_documents_access__public_channel(
    slack_connector: SlackConnector,
-    start_ts: float | None,
 ) -> None:
    """Test that retrieve_all_slim_docs_perm_sync returns correct access information for slim documents."""
    if not slack_connector.client:
        raise RuntimeError("Web client must be defined")

    slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
-        start=start_ts,
+        start=0.0,
        end=time.time(),
    )

@@ -157,7 +149,7 @@ def test_slim_documents_access__private_channel(
        raise RuntimeError("Web client must be defined")

    slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
-        start=None,
+        start=0.0,
        end=time.time(),
    )

--- a/backend/tests/external_dependency_unit/answer/stream_test_utils.py
+++ b/backend/tests/external_dependency_unit/answer/stream_test_utils.py
@@ -27,13 +27,11 @@ def create_placement(
    turn_index: int,
    tab_index: int = 0,
    sub_turn_index: int | None = None,
-    model_index: int | None = 0,
 ) -> Placement:
    return Placement(
        turn_index=turn_index,
        tab_index=tab_index,
        sub_turn_index=sub_turn_index,
-        model_index=model_index,
    )


--- a/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
@@ -1,7 +1,7 @@
 """
 External dependency unit tests for UserFileIndexingAdapter metadata writing.

-Validates that prepare_enrichment produces DocMetadataAwareIndexChunk
+Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
 objects with both `user_project` and `personas` fields populated correctly
 based on actual DB associations.

@@ -127,7 +127,7 @@ def _make_index_chunk(user_file: UserFile) -> IndexChunk:


 class TestAdapterWritesBothMetadataFields:
-    """prepare_enrichment must populate user_project AND personas."""
+    """build_metadata_aware_chunks must populate user_project AND personas."""

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
@@ -153,13 +153,15 @@ class TestAdapterWritesBothMetadataFields:
        doc = chunk.source_document
        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        assert len(result.chunks) == 1
+        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert aware_chunk.user_project == []

@@ -188,13 +190,15 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        assert len(result.chunks) == 1
+        aware_chunk = result.chunks[0]
        assert project.id in aware_chunk.user_project
        assert aware_chunk.personas == []

@@ -225,13 +229,14 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert project.id in aware_chunk.user_project

@@ -256,13 +261,14 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        aware_chunk = result.chunks[0]
        assert aware_chunk.personas == []
        assert aware_chunk.user_project == []

@@ -294,11 +300,12 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        aware_chunk = result.chunks[0]
        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}
--- a/backend/tests/external_dependency_unit/document_index/test_document_index.py
+++ b/backend/tests/external_dependency_unit/document_index/test_document_index.py
@@ -6,7 +6,6 @@ These tests assume Vespa and OpenSearch are running.
 import time
 import uuid
 from collections.abc import Generator
-from collections.abc import Iterator

 import httpx
 import pytest
@@ -22,7 +21,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
 )
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
-from onyx.indexing.models import DocMetadataAwareIndexChunk
 from tests.external_dependency_unit.constants import TEST_TENANT_ID
 from tests.external_dependency_unit.document_index.conftest import EMBEDDING_DIM
 from tests.external_dependency_unit.document_index.conftest import make_chunk
@@ -203,25 +201,3 @@ class TestDocumentIndexNew:
            assert len(result_map) == 2
            assert result_map[existing_doc] is True
            assert result_map[new_doc] is False
-
-    def test_index_accepts_generator(
-        self,
-        document_indices: list[DocumentIndexNew],
-        tenant_context: None,  # noqa: ARG002
-    ) -> None:
-        """index() accepts a generator (any iterable), not just a list."""
-        for document_index in document_indices:
-            doc_id = f"test_gen_{uuid.uuid4().hex[:8]}"
-            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[3])
-
-            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
-                for i in range(3):
-                    yield make_chunk(doc_id, chunk_id=i)
-
-            results = document_index.index(
-                chunks=chunk_gen(), indexing_metadata=metadata
-            )
-
-            assert len(results) == 1
-            assert results[0].document_id == doc_id
-            assert results[0].already_existed is False
--- a/backend/tests/external_dependency_unit/document_index/test_document_index_old.py
+++ b/backend/tests/external_dependency_unit/document_index/test_document_index_old.py
@@ -5,7 +5,6 @@ These tests assume Vespa and OpenSearch are running.

 import time
 from collections.abc import Generator
-from collections.abc import Iterator

 import pytest

@@ -167,29 +166,3 @@ class TestDocumentIndexOld:
                batch_retrieval=True,
            )
            assert len(inference_chunks) == 0
-
-    def test_index_accepts_generator(
-        self,
-        document_indices: list[DocumentIndex],
-        tenant_context: None,  # noqa: ARG002
-    ) -> None:
-        """index() accepts a generator (any iterable), not just a list."""
-        for document_index in document_indices:
-
-            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
-                for i in range(3):
-                    yield make_chunk("test_doc_gen", chunk_id=i)
-
-            index_batch_params = IndexBatchParams(
-                doc_id_to_previous_chunk_cnt={"test_doc_gen": 0},
-                doc_id_to_new_chunk_cnt={"test_doc_gen": 3},
-                tenant_id=get_current_tenant_id(),
-                large_chunks_enabled=False,
-            )
-
-            results = document_index.index(chunk_gen(), index_batch_params)
-
-            assert len(results) == 1
-            record = results.pop()
-            assert record.document_id == "test_doc_gen"
-            assert record.already_existed is False
--- a/backend/tests/external_dependency_unit/tools/test_mcp_passthrough_oauth.py
+++ b/backend/tests/external_dependency_unit/tools/test_mcp_passthrough_oauth.py
@@ -13,7 +13,6 @@ This test:
 All external HTTP calls are mocked, but Postgres and Redis are running.
 """

-import queue
 from typing import Any
 from unittest.mock import patch
 from uuid import uuid4
@@ -21,7 +20,7 @@ from uuid import uuid4
 import pytest
 from sqlalchemy.orm import Session

-from onyx.chat.emitter import Emitter
+from onyx.chat.emitter import get_default_emitter
 from onyx.db.enums import MCPAuthenticationPerformer
 from onyx.db.enums import MCPAuthenticationType
 from onyx.db.enums import MCPTransport
@@ -138,7 +137,7 @@ class TestMCPPassThroughOAuth:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=search_tool_config,
@@ -201,7 +200,7 @@ class TestMCPPassThroughOAuth:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
@@ -276,7 +275,7 @@ class TestMCPPassThroughOAuth:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
@@ -351,7 +350,7 @@ class TestMCPPassThroughOAuth:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
@@ -459,7 +458,7 @@ class TestMCPPassThroughOAuth:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
@@ -542,7 +541,7 @@ class TestMCPPassThroughOAuth:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=SearchToolConfig(),
--- a/backend/tests/external_dependency_unit/tools/test_oauth_tool_integration.py
+++ b/backend/tests/external_dependency_unit/tools/test_oauth_tool_integration.py
@@ -8,7 +8,6 @@ Tests the priority logic for OAuth tokens when constructing custom tools:
 All external HTTP calls are mocked, but Postgres and Redis are running.
 """

-import queue
 from typing import Any
 from unittest.mock import Mock
 from unittest.mock import patch
@@ -17,7 +16,7 @@ from uuid import uuid4
 import pytest
 from sqlalchemy.orm import Session

-from onyx.chat.emitter import Emitter
+from onyx.chat.emitter import get_default_emitter
 from onyx.db.models import OAuthAccount
 from onyx.db.models import OAuthConfig
 from onyx.db.models import Persona
@@ -175,7 +174,7 @@ class TestOAuthToolIntegrationPriority:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
            search_tool_config=search_tool_config,
@@ -233,7 +232,7 @@ class TestOAuthToolIntegrationPriority:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
        )
@@ -285,7 +284,7 @@ class TestOAuthToolIntegrationPriority:
            tool_dict = construct_tools(
                persona=persona,
                db_session=db_session,
-                emitter=Emitter(merged_queue=queue.Queue()),
+                emitter=get_default_emitter(),
                user=user,
                llm=llm,
            )
@@ -346,7 +345,7 @@ class TestOAuthToolIntegrationPriority:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
        )
@@ -417,7 +416,7 @@ class TestOAuthToolIntegrationPriority:
            tool_dict = construct_tools(
                persona=persona,
                db_session=db_session,
-                emitter=Emitter(merged_queue=queue.Queue()),
+                emitter=get_default_emitter(),
                user=user,
                llm=llm,
            )
@@ -484,7 +483,7 @@ class TestOAuthToolIntegrationPriority:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
        )
@@ -537,7 +536,7 @@ class TestOAuthToolIntegrationPriority:
        tool_dict = construct_tools(
            persona=persona,
            db_session=db_session,
-            emitter=Emitter(merged_queue=queue.Queue()),
+            emitter=get_default_emitter(),
            user=user,
            llm=llm,
        )
--- a/backend/tests/unit/federated_connector/test_reject_masked_credentials.py
+++ b/backend/tests/unit/federated_connector/test_reject_masked_credentials.py
@@ -0,0 +1,58 @@
+import pytest
+
+from onyx.configs.constants import MASK_CREDENTIAL_CHAR
+from onyx.db.federated import _reject_masked_credentials
+
+
+class TestRejectMaskedCredentials:
+    """Verify that masked credential values are never accepted for DB writes.
+
+    mask_string() has two output formats:
+    - Short strings (< 14 chars): "••••••••••••" (U+2022 BULLET)
+    - Long strings (>= 14 chars): "abcd...wxyz" (first4 + "..." + last4)
+    _reject_masked_credentials must catch both.
+    """
+
+    def test_rejects_fully_masked_value(self) -> None:
+        masked = MASK_CREDENTIAL_CHAR * 12  # "••••••••••••"
+        with pytest.raises(ValueError, match="masked placeholder"):
+            _reject_masked_credentials({"client_id": masked})
+
+    def test_rejects_long_string_masked_value(self) -> None:
+        """mask_string returns 'first4...last4' for long strings — the real
+        format used for OAuth credentials like client_id and client_secret."""
+        with pytest.raises(ValueError, match="masked placeholder"):
+            _reject_masked_credentials({"client_id": "1234...7890"})
+
+    def test_rejects_when_any_field_is_masked(self) -> None:
+        """Even if client_id is real, a masked client_secret must be caught."""
+        with pytest.raises(ValueError, match="client_secret"):
+            _reject_masked_credentials(
+                {
+                    "client_id": "1234567890.1234567890",
+                    "client_secret": MASK_CREDENTIAL_CHAR * 12,
+                }
+            )
+
+    def test_accepts_real_credentials(self) -> None:
+        # Should not raise
+        _reject_masked_credentials(
+            {
+                "client_id": "1234567890.1234567890",
+                "client_secret": "test_client_secret_value",
+            }
+        )
+
+    def test_accepts_empty_dict(self) -> None:
+        # Should not raise — empty credentials are handled elsewhere
+        _reject_masked_credentials({})
+
+    def test_ignores_non_string_values(self) -> None:
+        # Non-string values (None, bool, int) should pass through
+        _reject_masked_credentials(
+            {
+                "client_id": "real_value",
+                "redirect_uri": None,
+                "some_flag": True,
+            }
+        )
--- a/backend/tests/unit/onyx/chat/test_emitter.py
+++ b/backend/tests/unit/onyx/chat/test_emitter.py
@@ -1,173 +0,0 @@
-"""Unit tests for the Emitter class.
-
-All tests use the streaming mode (merged_queue required). Emitter has a single
-code path — no standalone bus.
-"""
-
-import queue
-
-from onyx.chat.emitter import Emitter
-from onyx.server.query_and_chat.placement import Placement
-from onyx.server.query_and_chat.streaming_models import OverallStop
-from onyx.server.query_and_chat.streaming_models import Packet
-from onyx.server.query_and_chat.streaming_models import ReasoningStart
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _placement(
-    turn_index: int = 0,
-    tab_index: int = 0,
-    sub_turn_index: int | None = None,
-) -> Placement:
-    return Placement(
-        turn_index=turn_index,
-        tab_index=tab_index,
-        sub_turn_index=sub_turn_index,
-    )
-
-
-def _packet(
-    turn_index: int = 0,
-    tab_index: int = 0,
-    sub_turn_index: int | None = None,
-) -> Packet:
-    """Build a minimal valid packet with an OverallStop payload."""
-    return Packet(
-        placement=_placement(turn_index, tab_index, sub_turn_index),
-        obj=OverallStop(stop_reason="test"),
-    )
-
-
-def _make_emitter(model_idx: int = 0) -> tuple["Emitter", "queue.Queue"]:
-    """Return (emitter, queue) wired together."""
-    mq: queue.Queue = queue.Queue()
-    return Emitter(merged_queue=mq, model_idx=model_idx), mq
-
-
-# ---------------------------------------------------------------------------
-# Queue routing
-# ---------------------------------------------------------------------------
-
-
-class TestEmitterQueueRouting:
-    def test_emit_lands_on_merged_queue(self) -> None:
-        emitter, mq = _make_emitter()
-        emitter.emit(_packet())
-        assert not mq.empty()
-
-    def test_queue_item_is_tuple_of_key_and_packet(self) -> None:
-        emitter, mq = _make_emitter(model_idx=1)
-        emitter.emit(_packet())
-        item = mq.get_nowait()
-        assert isinstance(item, tuple)
-        assert len(item) == 2
-
-    def test_multiple_packets_delivered_fifo(self) -> None:
-        emitter, mq = _make_emitter()
-        p1 = _packet(turn_index=0)
-        p2 = _packet(turn_index=1)
-        emitter.emit(p1)
-        emitter.emit(p2)
-        _, t1 = mq.get_nowait()
-        _, t2 = mq.get_nowait()
-        assert t1.placement.turn_index == 0
-        assert t2.placement.turn_index == 1
-
-
-# ---------------------------------------------------------------------------
-# model_index tagging
-# ---------------------------------------------------------------------------
-
-
-class TestEmitterModelIndexTagging:
-    def test_n1_default_model_idx_tags_model_index_zero(self) -> None:
-        """N=1: default model_idx=0, so packet gets model_index=0."""
-        emitter, mq = _make_emitter(model_idx=0)
-        emitter.emit(_packet())
-        _key, tagged = mq.get_nowait()
-        assert tagged.placement.model_index == 0
-
-    def test_model_idx_one_tags_packet(self) -> None:
-        emitter, mq = _make_emitter(model_idx=1)
-        emitter.emit(_packet())
-        _key, tagged = mq.get_nowait()
-        assert tagged.placement.model_index == 1
-
-    def test_model_idx_two_tags_packet(self) -> None:
-        """Boundary: third model in a 3-model run."""
-        emitter, mq = _make_emitter(model_idx=2)
-        emitter.emit(_packet())
-        _key, tagged = mq.get_nowait()
-        assert tagged.placement.model_index == 2
-
-
-# ---------------------------------------------------------------------------
-# Queue key
-# ---------------------------------------------------------------------------
-
-
-class TestEmitterQueueKey:
-    def test_key_equals_model_idx(self) -> None:
-        """Drain loop uses the key to route packets; it must match model_idx."""
-        emitter, mq = _make_emitter(model_idx=2)
-        emitter.emit(_packet())
-        key, _ = mq.get_nowait()
-        assert key == 2
-
-    def test_n1_key_is_zero(self) -> None:
-        emitter, mq = _make_emitter(model_idx=0)
-        emitter.emit(_packet())
-        key, _ = mq.get_nowait()
-        assert key == 0
-
-
-# ---------------------------------------------------------------------------
-# Placement field preservation
-# ---------------------------------------------------------------------------
-
-
-class TestEmitterPlacementPreservation:
-    def test_turn_index_is_preserved(self) -> None:
-        emitter, mq = _make_emitter()
-        emitter.emit(_packet(turn_index=5))
-        _, tagged = mq.get_nowait()
-        assert tagged.placement.turn_index == 5
-
-    def test_tab_index_is_preserved(self) -> None:
-        emitter, mq = _make_emitter()
-        emitter.emit(_packet(tab_index=3))
-        _, tagged = mq.get_nowait()
-        assert tagged.placement.tab_index == 3
-
-    def test_sub_turn_index_is_preserved(self) -> None:
-        emitter, mq = _make_emitter()
-        emitter.emit(_packet(sub_turn_index=2))
-        _, tagged = mq.get_nowait()
-        assert tagged.placement.sub_turn_index == 2
-
-    def test_sub_turn_index_none_is_preserved(self) -> None:
-        emitter, mq = _make_emitter()
-        emitter.emit(_packet(sub_turn_index=None))
-        _, tagged = mq.get_nowait()
-        assert tagged.placement.sub_turn_index is None
-
-    def test_packet_obj_is_not_modified(self) -> None:
-        """The payload object must survive tagging untouched."""
-        emitter, mq = _make_emitter()
-        original_obj = OverallStop(stop_reason="sentinel")
-        pkt = Packet(placement=_placement(), obj=original_obj)
-        emitter.emit(pkt)
-        _, tagged = mq.get_nowait()
-        assert tagged.obj is original_obj
-
-    def test_different_obj_types_are_handled(self) -> None:
-        """Any valid PacketObj type passes through correctly."""
-        emitter, mq = _make_emitter()
-        pkt = Packet(placement=_placement(), obj=ReasoningStart())
-        emitter.emit(pkt)
-        _, tagged = mq.get_nowait()
-        assert isinstance(tagged.obj, ReasoningStart)
--- a/backend/tests/unit/onyx/chat/test_multi_model_streaming.py
+++ b/backend/tests/unit/onyx/chat/test_multi_model_streaming.py
@@ -1,768 +0,0 @@
-"""Unit tests for multi-model streaming validation and DB helpers.
-
-These are pure unit tests — no real database or LLM calls required.
-The validation logic in handle_multi_model_stream fires before any external
-calls, so we can trigger it with lightweight mocks.
-"""
-
-import time
-from collections.abc import Generator
-from typing import Any
-from typing import cast
-from unittest.mock import MagicMock
-from unittest.mock import patch
-from uuid import uuid4
-
-import pytest
-
-from onyx.chat.models import StreamingError
-from onyx.configs.constants import MessageType
-from onyx.db.chat import set_preferred_response
-from onyx.llm.override_models import LLMOverride
-from onyx.server.query_and_chat.models import SendMessageRequest
-from onyx.server.query_and_chat.placement import Placement
-from onyx.server.query_and_chat.streaming_models import OverallStop
-from onyx.server.query_and_chat.streaming_models import Packet
-from onyx.server.query_and_chat.streaming_models import ReasoningStart
-from onyx.utils.variable_functionality import global_version
-
-
-@pytest.fixture(autouse=True)
-def _restore_ee_version() -> Generator[None, None, None]:
-    """Reset EE global state after each test.
-
-    Importing onyx.chat.process_message triggers set_is_ee_based_on_env_variable()
-    (via the celery client import chain).  Without this fixture, the EE flag stays
-    True for the rest of the session and breaks unrelated tests that mock Confluence
-    or other connectors and assume EE is disabled.
-    """
-    original = global_version._is_ee
-    yield
-    global_version._is_ee = original
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _make_request(**kwargs: Any) -> SendMessageRequest:
-    defaults: dict[str, Any] = {
-        "message": "hello",
-        "chat_session_id": uuid4(),
-    }
-    defaults.update(kwargs)
-    return SendMessageRequest(**defaults)
-
-
-def _make_override(provider: str = "openai", version: str = "gpt-4") -> LLMOverride:
-    return LLMOverride(model_provider=provider, model_version=version)
-
-
-def _first_from_stream(req: SendMessageRequest, overrides: list[LLMOverride]) -> Any:
-    """Return the first item yielded by handle_multi_model_stream."""
-    from onyx.chat.process_message import handle_multi_model_stream
-
-    user = MagicMock()
-    user.is_anonymous = False
-    user.email = "test@example.com"
-    db = MagicMock()
-
-    gen = handle_multi_model_stream(req, user, db, overrides)
-    return next(gen)
-
-
-# ---------------------------------------------------------------------------
-# handle_multi_model_stream — validation
-# ---------------------------------------------------------------------------
-
-
-class TestRunMultiModelStreamValidation:
-    def test_single_override_yields_error(self) -> None:
-        """Exactly 1 override is not multi-model — yields StreamingError."""
-        req = _make_request()
-        result = _first_from_stream(req, [_make_override()])
-        assert isinstance(result, StreamingError)
-        assert "2-3" in result.error
-
-    def test_four_overrides_yields_error(self) -> None:
-        """4 overrides exceeds maximum — yields StreamingError."""
-        req = _make_request()
-        result = _first_from_stream(
-            req,
-            [
-                _make_override("openai", "gpt-4"),
-                _make_override("anthropic", "claude-3"),
-                _make_override("google", "gemini-pro"),
-                _make_override("cohere", "command-r"),
-            ],
-        )
-        assert isinstance(result, StreamingError)
-        assert "2-3" in result.error
-
-    def test_zero_overrides_yields_error(self) -> None:
-        """Empty override list yields StreamingError."""
-        req = _make_request()
-        result = _first_from_stream(req, [])
-        assert isinstance(result, StreamingError)
-        assert "2-3" in result.error
-
-    def test_deep_research_yields_error(self) -> None:
-        """deep_research=True is incompatible with multi-model — yields StreamingError."""
-        req = _make_request(deep_research=True)
-        result = _first_from_stream(
-            req, [_make_override(), _make_override("anthropic", "claude-3")]
-        )
-        assert isinstance(result, StreamingError)
-        assert "not supported" in result.error
-
-    def test_exactly_two_overrides_is_minimum(self) -> None:
-        """Boundary: 1 override yields error, 2 overrides passes validation."""
-        req = _make_request()
-        # 1 override must yield a StreamingError
-        result = _first_from_stream(req, [_make_override()])
-        assert isinstance(
-            result, StreamingError
-        ), "1 override should yield StreamingError"
-        # 2 overrides must NOT yield a validation StreamingError (may raise later due to
-        # missing session, that's OK — validation itself passed)
-        try:
-            result2 = _first_from_stream(
-                req, [_make_override(), _make_override("anthropic", "claude-3")]
-            )
-            if isinstance(result2, StreamingError) and "2-3" in result2.error:
-                pytest.fail(
-                    f"2 overrides should pass validation, got StreamingError: {result2.error}"
-                )
-        except Exception:
-            pass  # Any non-validation error means validation passed
-
-
-# ---------------------------------------------------------------------------
-# set_preferred_response — validation (mocked db)
-# ---------------------------------------------------------------------------
-
-
-class TestSetPreferredResponseValidation:
-    def test_user_message_not_found(self) -> None:
-        db = MagicMock()
-        db.get.return_value = None
-
-        with pytest.raises(ValueError, match="not found"):
-            set_preferred_response(
-                db, user_message_id=999, preferred_assistant_message_id=1
-            )
-
-    def test_wrong_message_type(self) -> None:
-        """Cannot set preferred response on a non-USER message."""
-        db = MagicMock()
-        user_msg = MagicMock()
-        user_msg.message_type = MessageType.ASSISTANT  # wrong type
-
-        db.get.return_value = user_msg
-
-        with pytest.raises(ValueError, match="not a user message"):
-            set_preferred_response(
-                db, user_message_id=1, preferred_assistant_message_id=2
-            )
-
-    def test_assistant_message_not_found(self) -> None:
-        db = MagicMock()
-        user_msg = MagicMock()
-        user_msg.message_type = MessageType.USER
-
-        # First call returns user_msg, second call (for assistant) returns None
-        db.get.side_effect = [user_msg, None]
-
-        with pytest.raises(ValueError, match="not found"):
-            set_preferred_response(
-                db, user_message_id=1, preferred_assistant_message_id=2
-            )
-
-    def test_assistant_not_child_of_user(self) -> None:
-        db = MagicMock()
-        user_msg = MagicMock()
-        user_msg.message_type = MessageType.USER
-
-        assistant_msg = MagicMock()
-        assistant_msg.parent_message_id = 999  # different parent
-
-        db.get.side_effect = [user_msg, assistant_msg]
-
-        with pytest.raises(ValueError, match="not a child"):
-            set_preferred_response(
-                db, user_message_id=1, preferred_assistant_message_id=2
-            )
-
-    def test_valid_call_sets_preferred_response_id(self) -> None:
-        db = MagicMock()
-        user_msg = MagicMock()
-        user_msg.message_type = MessageType.USER
-
-        assistant_msg = MagicMock()
-        assistant_msg.parent_message_id = 1  # correct parent
-
-        db.get.side_effect = [user_msg, assistant_msg]
-
-        set_preferred_response(db, user_message_id=1, preferred_assistant_message_id=2)
-
-        assert user_msg.preferred_response_id == 2
-        assert user_msg.latest_child_message_id == 2
-
-
-# ---------------------------------------------------------------------------
-# LLMOverride — display_name field
-# ---------------------------------------------------------------------------
-
-
-class TestLLMOverrideDisplayName:
-    def test_display_name_defaults_none(self) -> None:
-        override = LLMOverride(model_provider="openai", model_version="gpt-4")
-        assert override.display_name is None
-
-    def test_display_name_set(self) -> None:
-        override = LLMOverride(
-            model_provider="openai",
-            model_version="gpt-4",
-            display_name="GPT-4 Turbo",
-        )
-        assert override.display_name == "GPT-4 Turbo"
-
-    def test_display_name_serializes(self) -> None:
-        override = LLMOverride(
-            model_provider="anthropic",
-            model_version="claude-opus-4-6",
-            display_name="Claude Opus",
-        )
-        d = override.model_dump()
-        assert d["display_name"] == "Claude Opus"
-
-
-# ---------------------------------------------------------------------------
-# _run_models — drain loop behaviour
-# ---------------------------------------------------------------------------
-
-
-def _make_setup(n_models: int = 1) -> MagicMock:
-    """Minimal ChatTurnSetup mock whose fields pass Pydantic validation in _run_model."""
-    setup = MagicMock()
-    setup.llms = [MagicMock() for _ in range(n_models)]
-    setup.model_display_names = [f"model-{i}" for i in range(n_models)]
-    setup.check_is_connected = MagicMock(return_value=True)
-    setup.reserved_messages = [MagicMock() for _ in range(n_models)]
-    setup.reserved_token_count = 100
-    # Fields consumed by SearchToolConfig / CustomToolConfig / FileReaderToolConfig
-    # constructors inside _run_model — must be typed correctly for Pydantic.
-    setup.new_msg_req.deep_research = False
-    setup.new_msg_req.internal_search_filters = None
-    setup.new_msg_req.allowed_tool_ids = None
-    setup.new_msg_req.include_citations = True
-    setup.search_params.project_id_filter = None
-    setup.search_params.persona_id_filter = None
-    setup.bypass_acl = False
-    setup.slack_context = None
-    setup.available_files.user_file_ids = []
-    setup.available_files.chat_file_ids = []
-    setup.forced_tool_id = None
-    setup.simple_chat_history = []
-    setup.chat_session.id = uuid4()
-    setup.user_message.id = None
-    setup.custom_tool_additional_headers = None
-    setup.mcp_headers = None
-    return setup
-
-
-def _run_models_collect(setup: MagicMock) -> list:
-    """Drive _run_models to completion and return all yielded items."""
-    from onyx.chat.process_message import _run_models
-
-    return list(_run_models(setup, MagicMock(), MagicMock()))
-
-
-class TestRunModels:
-    """Tests for the _run_models worker-thread drain loop.
-
-    All external dependencies (LLM, DB, tools) are patched out.  Worker threads
-    still run but return immediately since run_llm_loop is mocked.
-    """
-
-    def test_n1_overall_stop_from_llm_loop_passes_through(self) -> None:
-        """OverallStop emitted by run_llm_loop is passed through the drain loop unchanged."""
-
-        def emit_stop(**kwargs: Any) -> None:
-            kwargs["emitter"].emit(
-                Packet(
-                    placement=Placement(turn_index=0),
-                    obj=OverallStop(stop_reason="complete"),
-                )
-            )
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_stop),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            packets = _run_models_collect(_make_setup(n_models=1))
-
-        stops = [
-            p
-            for p in packets
-            if isinstance(p, Packet) and isinstance(p.obj, OverallStop)
-        ]
-        assert len(stops) == 1
-        stop_obj = stops[0].obj
-        assert isinstance(stop_obj, OverallStop)
-        assert stop_obj.stop_reason == "complete"
-
-    def test_n1_emitted_packet_has_model_index_zero(self) -> None:
-        """Single-model path: model_index is 0 (Emitter defaults model_idx=0)."""
-
-        def emit_one(**kwargs: Any) -> None:
-            kwargs["emitter"].emit(
-                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
-            )
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_one),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            packets = _run_models_collect(_make_setup(n_models=1))
-
-        reasoning = [
-            p
-            for p in packets
-            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)
-        ]
-        assert len(reasoning) == 1
-        assert reasoning[0].placement.model_index == 0
-
-    def test_n2_each_model_packet_tagged_with_its_index(self) -> None:
-        """Multi-model path: packets from model 0 get index=0, model 1 gets index=1."""
-
-        def emit_one(**kwargs: Any) -> None:
-            # _model_idx is set by _run_model based on position in setup.llms
-            emitter = kwargs["emitter"]
-            emitter.emit(
-                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
-            )
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_one),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            packets = _run_models_collect(_make_setup(n_models=2))
-
-        reasoning = [
-            p
-            for p in packets
-            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)
-        ]
-        assert len(reasoning) == 2
-        indices = {p.placement.model_index for p in reasoning}
-        assert indices == {0, 1}
-
-    def test_model_error_yields_streaming_error(self) -> None:
-        """An exception inside a worker thread is surfaced as a StreamingError."""
-
-        def always_fail(**_kwargs: Any) -> None:
-            raise RuntimeError("intentional test failure")
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=always_fail),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            packets = _run_models_collect(_make_setup(n_models=1))
-
-        errors = [p for p in packets if isinstance(p, StreamingError)]
-        assert len(errors) == 1
-        assert errors[0].error_code == "MODEL_ERROR"
-        assert "intentional test failure" in errors[0].error
-
-    def test_one_model_error_does_not_stop_other_models(self) -> None:
-        """A failing model yields StreamingError; the surviving model's packets still arrive."""
-        setup = _make_setup(n_models=2)
-
-        def fail_model_0_succeed_model_1(**kwargs: Any) -> None:
-            if kwargs["llm"] is setup.llms[0]:
-                raise RuntimeError("model 0 failed")
-            kwargs["emitter"].emit(
-                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
-            )
-
-        with (
-            patch(
-                "onyx.chat.process_message.run_llm_loop",
-                side_effect=fail_model_0_succeed_model_1,
-            ),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            packets = _run_models_collect(setup)
-
-        errors = [p for p in packets if isinstance(p, StreamingError)]
-        assert len(errors) == 1
-
-        reasoning = [
-            p
-            for p in packets
-            if isinstance(p, Packet) and isinstance(p.obj, ReasoningStart)
-        ]
-        assert len(reasoning) == 1
-        assert reasoning[0].placement.model_index == 1
-
-    def test_cancellation_yields_user_cancelled_stop(self) -> None:
-        """If check_is_connected returns False, drain loop emits user_cancelled."""
-
-        def slow_llm(**_kwargs: Any) -> None:
-            time.sleep(0.3)  # Outlasts the 50 ms queue-poll interval
-
-        setup = _make_setup(n_models=1)
-        setup.check_is_connected = MagicMock(return_value=False)
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=slow_llm),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            packets = _run_models_collect(setup)
-
-        stops = [
-            p
-            for p in packets
-            if isinstance(p, Packet) and isinstance(p.obj, OverallStop)
-        ]
-        assert any(
-            isinstance(s.obj, OverallStop) and s.obj.stop_reason == "user_cancelled"
-            for s in stops
-        )
-
-    def test_stop_button_calls_completion_for_all_models(self) -> None:
-        """llm_loop_completion_handle must be called for all models when the stop button fires.
-
-        Regression test for the disconnect-cleanup bug: the old
-        run_chat_loop_with_state_containers always called completion_callback in
-        its finally block (even on disconnect) so the DB message was updated from
-        the TERMINATED placeholder to a partial answer.  The new _run_models must
-        replicate this — otherwise the integration test
-        test_send_message_disconnect_and_cleanup fails because the message stays
-        as "Response was terminated prior to completion, try regenerating."
-        """
-
-        def slow_llm(**_kwargs: Any) -> None:
-            time.sleep(0.3)
-
-        setup = _make_setup(n_models=2)
-        setup.check_is_connected = MagicMock(return_value=False)
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=slow_llm),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch(
-                "onyx.chat.process_message.llm_loop_completion_handle"
-            ) as mock_handle,
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            _run_models_collect(setup)
-
-        # Must be called once per model, not zero times
-        assert mock_handle.call_count == 2
-
-    def test_completion_handle_called_for_each_successful_model(self) -> None:
-        """llm_loop_completion_handle must be called once per model that succeeded."""
-        setup = _make_setup(n_models=2)
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop"),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch(
-                "onyx.chat.process_message.llm_loop_completion_handle"
-            ) as mock_handle,
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            _run_models_collect(setup)
-
-        assert mock_handle.call_count == 2
-
-    def test_completion_handle_not_called_for_failed_model(self) -> None:
-        """llm_loop_completion_handle must be skipped for a model that raised."""
-
-        def always_fail(**_kwargs: Any) -> None:
-            raise RuntimeError("fail")
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=always_fail),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch(
-                "onyx.chat.process_message.llm_loop_completion_handle"
-            ) as mock_handle,
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            _run_models_collect(_make_setup(n_models=1))
-
-        mock_handle.assert_not_called()
-
-    def test_http_disconnect_completion_via_generator_exit(self) -> None:
-        """GeneratorExit from HTTP disconnect triggers worker self-completion.
-
-        When the HTTP client closes the connection, Starlette throws GeneratorExit
-        into the stream generator. The finally block sets drain_done (signalling
-        emitters to stop blocking) and calls executor.shutdown(wait=False) so the
-        server thread is never blocked. Worker threads detect drain_done.is_set()
-        after run_llm_loop completes and self-persist the result via
-        llm_loop_completion_handle using their own DB session.
-
-        This is the primary regression for test_send_message_disconnect_and_cleanup:
-        the integration test disconnects mid-stream and expects the DB message to be
-        updated from the TERMINATED placeholder to the real response.
-        """
-        import threading
-
-        # Signals the worker to unblock from run_llm_loop after gen.close() returns.
-        # This guarantees drain_done is set BEFORE the worker returns from run_llm_loop,
-        # so the self-completion path (drain_done.is_set() check) is always taken.
-        disconnect_received = threading.Event()
-        # Set by the llm_loop_completion_handle mock when called.
-        completion_called = threading.Event()
-
-        def emit_then_complete(**kwargs: Any) -> None:
-            """Emit one packet (to give the drain loop a yield point), then block
-            until the main thread signals that gen.close() has been called.  This
-            ensures drain_done is set before we return so model_succeeded is checked
-            against a set drain_done — no race condition.
-            """
-            emitter = kwargs["emitter"]
-            emitter.emit(
-                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
-            )
-            disconnect_received.wait(timeout=5)
-
-        setup = _make_setup(n_models=1)
-        # is_connected() always True — HTTP disconnect does NOT set the Redis stop fence.
-        setup.check_is_connected = MagicMock(return_value=True)
-
-        with (
-            patch(
-                "onyx.chat.process_message.run_llm_loop",
-                side_effect=emit_then_complete,
-            ),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch(
-                "onyx.chat.process_message.llm_loop_completion_handle",
-                side_effect=lambda *_, **__: completion_called.set(),
-            ) as mock_handle,
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            from onyx.chat.process_message import _run_models
-
-            # cast to Generator so .close() is available; _run_models returns
-            # AnswerStream (= Iterator) but the actual object is always a generator.
-            gen = cast(Generator, _run_models(setup, MagicMock(), MagicMock()))
-            # Advance to the first yielded packet — generator suspends at `yield item`.
-            first = next(gen)
-            assert isinstance(first, Packet)
-            # Simulate Starlette closing the stream on HTTP client disconnect.
-            # GeneratorExit is thrown at the `yield item` suspension point.
-            gen.close()
-            # Unblock the worker now that drain_done has been set by gen.close().
-            disconnect_received.set()
-
-            # Worker self-completes asynchronously (executor.shutdown(wait=False)).
-            # Wait here, inside the patch context, so that get_session_with_current_tenant
-            # and llm_loop_completion_handle mocks are still active when the worker calls them.
-            assert completion_called.wait(
-                timeout=5
-            ), "worker must self-complete via drain_done within 5 seconds"
-            assert (
-                mock_handle.call_count == 1
-            ), "completion handle must be called once for the successful model"
-
-    def test_b1_race_disconnect_handler_completes_already_finished_model(self) -> None:
-        """B1 regression: model finishes BEFORE GeneratorExit fires.
-
-        The worker exits _run_model with drain_done.is_set()=False and skips
-        self-completion.  When gen.close() fires afterward, the finally else-branch
-        must detect model_succeeded=True and call llm_loop_completion_handle itself.
-
-        Contrast with test_http_disconnect_completion_via_generator_exit, which
-        tests the opposite ordering (worker finishes AFTER disconnect).
-        """
-        import threading
-        import time
-
-        completion_called = threading.Event()
-
-        def emit_and_return_immediately(**kwargs: Any) -> None:
-            # Emit one packet so the drain loop has something to yield, then return
-            # immediately — no blocking.  The worker will be done in microseconds.
-            kwargs["emitter"].emit(
-                Packet(placement=Placement(turn_index=0), obj=ReasoningStart())
-            )
-
-        setup = _make_setup(n_models=1)
-        setup.check_is_connected = MagicMock(return_value=True)
-
-        with (
-            patch(
-                "onyx.chat.process_message.run_llm_loop",
-                side_effect=emit_and_return_immediately,
-            ),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch(
-                "onyx.chat.process_message.llm_loop_completion_handle",
-                side_effect=lambda *_, **__: completion_called.set(),
-            ) as mock_handle,
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            from onyx.chat.process_message import _run_models
-
-            gen = cast(Generator, _run_models(setup, MagicMock(), MagicMock()))
-            first = next(gen)
-            assert isinstance(first, Packet)
-
-            # Give the worker thread time to finish completely (emit + return +
-            # finally + self-completion check).  It does almost no work, so 100 ms
-            # is far more than enough while still keeping the test fast.
-            time.sleep(0.1)
-
-            # Now close — worker is already done, so else-branch handles completion.
-            gen.close()
-
-            assert completion_called.wait(
-                timeout=5
-            ), "disconnect handler must call completion for a model that already finished"
-            assert mock_handle.call_count == 1, "completion must be called exactly once"
-
-    def test_stop_button_does_not_call_completion_for_errored_model(self) -> None:
-        """B2 regression: stop-button must NOT call completion for an errored model.
-
-        When model 0 raises an exception, its reserved ChatMessage must not be
-        saved with 'stopped by user' — that message is wrong for a model that
-        errored.  llm_loop_completion_handle must only be called for non-errored
-        models when the stop button fires.
-        """
-
-        def fail_model_0(**kwargs: Any) -> None:
-            if kwargs["llm"] is setup.llms[0]:
-                raise RuntimeError("model 0 errored")
-            # Model 1: run forever (stop button fires before it finishes)
-            time.sleep(10)
-
-        setup = _make_setup(n_models=2)
-        # Return False immediately so the stop-button path fires while model 1
-        # is still sleeping (model 0 has already errored by then).
-        setup.check_is_connected = lambda: False
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop", side_effect=fail_model_0),
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch(
-                "onyx.chat.process_message.llm_loop_completion_handle"
-            ) as mock_handle,
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            _run_models_collect(setup)
-
-        # Completion must NOT be called for model 0 (it errored).
-        # It MAY be called for model 1 (still in-flight when stop fired).
-        for call in mock_handle.call_args_list:
-            assert (
-                call.kwargs.get("llm") is not setup.llms[0]
-            ), "llm_loop_completion_handle must not be called for the errored model"
-
-    def test_external_state_container_used_for_model_zero(self) -> None:
-        """When provided, external_state_container is used as state_containers[0]."""
-        from onyx.chat.chat_state import ChatStateContainer
-        from onyx.chat.process_message import _run_models
-
-        external = ChatStateContainer()
-        setup = _make_setup(n_models=1)
-
-        with (
-            patch("onyx.chat.process_message.run_llm_loop") as mock_llm,
-            patch("onyx.chat.process_message.run_deep_research_llm_loop"),
-            patch("onyx.chat.process_message.construct_tools", return_value={}),
-            patch("onyx.chat.process_message.get_session_with_current_tenant"),
-            patch("onyx.chat.process_message.llm_loop_completion_handle"),
-            patch(
-                "onyx.chat.process_message.get_llm_token_counter",
-                return_value=lambda _: 0,
-            ),
-        ):
-            list(
-                _run_models(
-                    setup, MagicMock(), MagicMock(), external_state_container=external
-                )
-            )
-
-        # The state_container kwarg passed to run_llm_loop must be the external one
-        call_kwargs = mock_llm.call_args.kwargs
-        assert call_kwargs["state_container"] is external
--- a/backend/tests/unit/onyx/connectors/jira/test_jira_permission_sync.py
+++ b/backend/tests/unit/onyx/connectors/jira/test_jira_permission_sync.py
@@ -1,5 +1,3 @@
-from datetime import datetime
-from datetime import timezone
 from unittest.mock import MagicMock
 from unittest.mock import patch

@@ -33,7 +31,6 @@ def mock_jira_cc_pair(
        "jira_base_url": jira_base_url,
        "project_key": project_key,
    }
-    mock_cc_pair.connector.indexing_start = None

    return mock_cc_pair

@@ -68,75 +65,3 @@ def test_jira_permission_sync(
            fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
        ):
            print(doc)
-
-
-def test_jira_doc_sync_passes_indexing_start(
-    jira_connector: JiraConnector,
-    mock_jira_cc_pair: MagicMock,
-    mock_fetch_all_existing_docs_fn: MagicMock,
-    mock_fetch_all_existing_docs_ids_fn: MagicMock,
-) -> None:
-    """Verify that generic_doc_sync derives indexing_start from cc_pair
-    and forwards it to retrieve_all_slim_docs_perm_sync."""
-    indexing_start_dt = datetime(2025, 6, 1, tzinfo=timezone.utc)
-    mock_jira_cc_pair.connector.indexing_start = indexing_start_dt
-
-    with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
-        mock_build_client.return_value = jira_connector._jira_client
-        assert jira_connector._jira_client is not None
-        jira_connector._jira_client._options = MagicMock()
-        jira_connector._jira_client._options.return_value = {
-            "rest_api_version": JIRA_SERVER_API_VERSION
-        }
-
-        with patch.object(
-            type(jira_connector),
-            "retrieve_all_slim_docs_perm_sync",
-            return_value=iter([]),
-        ) as mock_retrieve:
-            list(
-                jira_doc_sync(
-                    cc_pair=mock_jira_cc_pair,
-                    fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
-                    fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
-                )
-            )
-
-            mock_retrieve.assert_called_once()
-            call_kwargs = mock_retrieve.call_args
-            assert call_kwargs.kwargs["start"] == indexing_start_dt.timestamp()
-
-
-def test_jira_doc_sync_passes_none_when_no_indexing_start(
-    jira_connector: JiraConnector,
-    mock_jira_cc_pair: MagicMock,
-    mock_fetch_all_existing_docs_fn: MagicMock,
-    mock_fetch_all_existing_docs_ids_fn: MagicMock,
-) -> None:
-    """Verify that indexing_start is None when the connector has no indexing_start set."""
-    mock_jira_cc_pair.connector.indexing_start = None
-
-    with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
-        mock_build_client.return_value = jira_connector._jira_client
-        assert jira_connector._jira_client is not None
-        jira_connector._jira_client._options = MagicMock()
-        jira_connector._jira_client._options.return_value = {
-            "rest_api_version": JIRA_SERVER_API_VERSION
-        }
-
-        with patch.object(
-            type(jira_connector),
-            "retrieve_all_slim_docs_perm_sync",
-            return_value=iter([]),
-        ) as mock_retrieve:
-            list(
-                jira_doc_sync(
-                    cc_pair=mock_jira_cc_pair,
-                    fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
-                    fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
-                )
-            )
-
-            mock_retrieve.assert_called_once()
-            call_kwargs = mock_retrieve.call_args
-            assert call_kwargs.kwargs["start"] is None
--- a/backend/tests/unit/onyx/context/search/federated/test_build_thread_text.py
+++ b/backend/tests/unit/onyx/context/search/federated/test_build_thread_text.py
@@ -0,0 +1,67 @@
+"""Tests for _build_thread_text function."""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.context.search.federated.slack_search import _build_thread_text
+
+
+def _make_msg(user: str, text: str, ts: str) -> dict[str, str]:
+    return {"user": user, "text": text, "ts": ts}
+
+
+class TestBuildThreadText:
+    """Verify _build_thread_text includes full thread replies up to cap."""
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_includes_all_replies(self, mock_profiles: MagicMock) -> None:
+        """All replies within cap are included in output."""
+        mock_profiles.return_value = {}
+        messages = [
+            _make_msg("U1", "parent msg", "1000.0"),
+            _make_msg("U2", "reply 1", "1001.0"),
+            _make_msg("U3", "reply 2", "1002.0"),
+            _make_msg("U4", "reply 3", "1003.0"),
+        ]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        assert "parent msg" in result
+        assert "reply 1" in result
+        assert "reply 2" in result
+        assert "reply 3" in result
+        assert "..." not in result
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_non_thread_returns_parent_only(self, mock_profiles: MagicMock) -> None:
+        """Single message (no replies) returns just the parent text."""
+        mock_profiles.return_value = {}
+        messages = [_make_msg("U1", "just a message", "1000.0")]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        assert "just a message" in result
+        assert "Replies:" not in result
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_parent_always_first(self, mock_profiles: MagicMock) -> None:
+        """Thread parent message is always the first line of output."""
+        mock_profiles.return_value = {}
+        messages = [
+            _make_msg("U1", "I am the parent", "1000.0"),
+            _make_msg("U2", "I am a reply", "1001.0"),
+        ]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        parent_pos = result.index("I am the parent")
+        reply_pos = result.index("I am a reply")
+        assert parent_pos < reply_pos
+
+    @patch("onyx.context.search.federated.slack_search.batch_get_user_profiles")
+    def test_user_profiles_resolved(self, mock_profiles: MagicMock) -> None:
+        """User IDs in thread text are replaced with display names."""
+        mock_profiles.return_value = {"U1": "Alice", "U2": "Bob"}
+        messages = [
+            _make_msg("U1", "hello", "1000.0"),
+            _make_msg("U2", "world", "1001.0"),
+        ]
+        result = _build_thread_text(messages, "token", "T123", MagicMock())
+        assert "Alice" in result
+        assert "Bob" in result
+        assert "<@U1>" not in result
+        assert "<@U2>" not in result
--- a/backend/tests/unit/onyx/context/search/federated/test_url_override.py
+++ b/backend/tests/unit/onyx/context/search/federated/test_url_override.py
@@ -0,0 +1,108 @@
+"""Tests for Slack URL parsing and direct thread fetch via URL override."""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.context.search.federated.models import DirectThreadFetch
+from onyx.context.search.federated.slack_search import _fetch_thread_from_url
+from onyx.context.search.federated.slack_search_utils import extract_slack_message_urls
+
+
+class TestExtractSlackMessageUrls:
+    """Verify URL parsing extracts channel_id and timestamp correctly."""
+
+    def test_standard_url(self) -> None:
+        query = "summarize https://mycompany.slack.com/archives/C097NBWMY8Y/p1775491616524769"
+        results = extract_slack_message_urls(query)
+        assert len(results) == 1
+        assert results[0] == ("C097NBWMY8Y", "1775491616.524769")
+
+    def test_multiple_urls(self) -> None:
+        query = (
+            "compare https://co.slack.com/archives/C111/p1234567890123456 "
+            "and https://co.slack.com/archives/C222/p9876543210987654"
+        )
+        results = extract_slack_message_urls(query)
+        assert len(results) == 2
+        assert results[0] == ("C111", "1234567890.123456")
+        assert results[1] == ("C222", "9876543210.987654")
+
+    def test_no_urls(self) -> None:
+        query = "what happened in #general last week?"
+        results = extract_slack_message_urls(query)
+        assert len(results) == 0
+
+    def test_non_slack_url_ignored(self) -> None:
+        query = "check https://google.com/archives/C111/p1234567890123456"
+        results = extract_slack_message_urls(query)
+        assert len(results) == 0
+
+    def test_timestamp_conversion(self) -> None:
+        """p prefix removed, dot inserted after 10th digit."""
+        query = "https://x.slack.com/archives/CABC123/p1775491616524769"
+        results = extract_slack_message_urls(query)
+        channel_id, ts = results[0]
+        assert channel_id == "CABC123"
+        assert ts == "1775491616.524769"
+        assert not ts.startswith("p")
+        assert "." in ts
+
+
+class TestFetchThreadFromUrl:
+    """Verify _fetch_thread_from_url calls conversations.replies and returns SlackMessage."""
+
+    @patch("onyx.context.search.federated.slack_search._build_thread_text")
+    @patch("onyx.context.search.federated.slack_search.WebClient")
+    def test_successful_fetch(
+        self, mock_webclient_cls: MagicMock, mock_build_thread: MagicMock
+    ) -> None:
+        mock_client = MagicMock()
+        mock_webclient_cls.return_value = mock_client
+
+        # Mock conversations_replies
+        mock_response = MagicMock()
+        mock_response.get.return_value = [
+            {"user": "U1", "text": "parent", "ts": "1775491616.524769"},
+            {"user": "U2", "text": "reply 1", "ts": "1775491617.000000"},
+            {"user": "U3", "text": "reply 2", "ts": "1775491618.000000"},
+        ]
+        mock_client.conversations_replies.return_value = mock_response
+
+        # Mock channel info
+        mock_ch_response = MagicMock()
+        mock_ch_response.get.return_value = {"name": "general"}
+        mock_client.conversations_info.return_value = mock_ch_response
+
+        mock_build_thread.return_value = (
+            "U1: parent\n\nReplies:\n\nU2: reply 1\n\nU3: reply 2"
+        )
+
+        fetch = DirectThreadFetch(
+            channel_id="C097NBWMY8Y", thread_ts="1775491616.524769"
+        )
+        result = _fetch_thread_from_url(fetch, "xoxp-token")
+
+        assert len(result.messages) == 1
+        msg = result.messages[0]
+        assert msg.channel_id == "C097NBWMY8Y"
+        assert msg.thread_id is None  # Prevents double-enrichment
+        assert msg.slack_score == 100000.0
+        assert "parent" in msg.text
+        mock_client.conversations_replies.assert_called_once_with(
+            channel="C097NBWMY8Y", ts="1775491616.524769"
+        )
+
+    @patch("onyx.context.search.federated.slack_search.WebClient")
+    def test_api_error_returns_empty(self, mock_webclient_cls: MagicMock) -> None:
+        from slack_sdk.errors import SlackApiError
+
+        mock_client = MagicMock()
+        mock_webclient_cls.return_value = mock_client
+        mock_client.conversations_replies.side_effect = SlackApiError(
+            message="channel_not_found",
+            response=MagicMock(status_code=404),
+        )
+
+        fetch = DirectThreadFetch(channel_id="CBAD", thread_ts="1234567890.123456")
+        result = _fetch_thread_from_url(fetch, "xoxp-token")
+        assert len(result.messages) == 0
--- a/backend/tests/unit/onyx/db/test_chat_sessions.py
+++ b/backend/tests/unit/onyx/db/test_chat_sessions.py
@@ -0,0 +1,225 @@
+"""Tests for get_chat_sessions_by_user filtering behavior.
+
+Verifies that failed chat sessions (those with only SYSTEM messages) are
+correctly filtered out while preserving recently created sessions, matching
+the behavior specified in PR #7233.
+"""
+
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from unittest.mock import MagicMock
+from uuid import UUID
+from uuid import uuid4
+
+import pytest
+from sqlalchemy.orm import Session
+
+from onyx.db.chat import get_chat_sessions_by_user
+from onyx.db.models import ChatSession
+
+
+def _make_session(
+    user_id: UUID,
+    time_created: datetime | None = None,
+    time_updated: datetime | None = None,
+    description: str = "",
+) -> MagicMock:
+    """Create a mock ChatSession with the given attributes."""
+    session = MagicMock(spec=ChatSession)
+    session.id = uuid4()
+    session.user_id = user_id
+    session.time_created = time_created or datetime.now(timezone.utc)
+    session.time_updated = time_updated or session.time_created
+    session.description = description
+    session.deleted = False
+    session.onyxbot_flow = False
+    session.project_id = None
+    return session
+
+
+@pytest.fixture
+def user_id() -> UUID:
+    return uuid4()
+
+
+@pytest.fixture
+def old_time() -> datetime:
+    """A timestamp well outside the 5-minute leeway window."""
+    return datetime.now(timezone.utc) - timedelta(hours=1)
+
+
+@pytest.fixture
+def recent_time() -> datetime:
+    """A timestamp within the 5-minute leeway window."""
+    return datetime.now(timezone.utc) - timedelta(minutes=2)
+
+
+class TestGetChatSessionsByUser:
+    """Tests for the failed chat filtering logic in get_chat_sessions_by_user."""
+
+    def test_filters_out_failed_sessions(
+        self, user_id: UUID, old_time: datetime
+    ) -> None:
+        """Sessions with only SYSTEM messages should be excluded."""
+        valid_session = _make_session(user_id, time_created=old_time)
+        failed_session = _make_session(user_id, time_created=old_time)
+
+        db_session = MagicMock(spec=Session)
+
+        # First execute: returns all sessions
+        # Second execute: returns only the valid session's ID (has non-system msgs)
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = [
+            valid_session,
+            failed_session,
+        ]
+
+        mock_result_2 = MagicMock()
+        mock_result_2.scalars.return_value.all.return_value = [valid_session.id]
+
+        db_session.execute.side_effect = [mock_result_1, mock_result_2]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        assert len(result) == 1
+        assert result[0].id == valid_session.id
+
+    def test_keeps_recent_sessions_without_messages(
+        self, user_id: UUID, recent_time: datetime
+    ) -> None:
+        """Recently created sessions should be kept even without messages."""
+        recent_session = _make_session(user_id, time_created=recent_time)
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = [recent_session]
+
+        db_session.execute.side_effect = [mock_result_1]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        assert len(result) == 1
+        assert result[0].id == recent_session.id
+        # Should only have been called once — no second query needed
+        # because the recent session is within the leeway window
+        assert db_session.execute.call_count == 1
+
+    def test_include_failed_chats_skips_filtering(
+        self, user_id: UUID, old_time: datetime
+    ) -> None:
+        """When include_failed_chats=True, no filtering should occur."""
+        session_a = _make_session(user_id, time_created=old_time)
+        session_b = _make_session(user_id, time_created=old_time)
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result = MagicMock()
+        mock_result.scalars.return_value.all.return_value = [session_a, session_b]
+
+        db_session.execute.side_effect = [mock_result]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=True,
+        )
+
+        assert len(result) == 2
+        # Only one DB call — no second query for message validation
+        assert db_session.execute.call_count == 1
+
+    def test_limit_applied_after_filtering(
+        self, user_id: UUID, old_time: datetime
+    ) -> None:
+        """Limit should be applied after filtering, not before."""
+        sessions = [_make_session(user_id, time_created=old_time) for _ in range(5)]
+        valid_ids = [s.id for s in sessions[:3]]
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = sessions
+
+        mock_result_2 = MagicMock()
+        mock_result_2.scalars.return_value.all.return_value = valid_ids
+
+        db_session.execute.side_effect = [mock_result_1, mock_result_2]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+            limit=2,
+        )
+
+        assert len(result) == 2
+        # Should be the first 2 valid sessions (order preserved)
+        assert result[0].id == sessions[0].id
+        assert result[1].id == sessions[1].id
+
+    def test_mixed_recent_and_old_sessions(
+        self, user_id: UUID, old_time: datetime, recent_time: datetime
+    ) -> None:
+        """Mix of recent and old sessions should filter correctly."""
+        old_valid = _make_session(user_id, time_created=old_time)
+        old_failed = _make_session(user_id, time_created=old_time)
+        recent_no_msgs = _make_session(user_id, time_created=recent_time)
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = [
+            old_valid,
+            old_failed,
+            recent_no_msgs,
+        ]
+
+        mock_result_2 = MagicMock()
+        mock_result_2.scalars.return_value.all.return_value = [old_valid.id]
+
+        db_session.execute.side_effect = [mock_result_1, mock_result_2]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        result_ids = {cs.id for cs in result}
+        assert old_valid.id in result_ids
+        assert recent_no_msgs.id in result_ids
+        assert old_failed.id not in result_ids
+
+    def test_empty_result(self, user_id: UUID) -> None:
+        """No sessions should return empty list without errors."""
+        db_session = MagicMock(spec=Session)
+
+        mock_result = MagicMock()
+        mock_result.scalars.return_value.all.return_value = []
+
+        db_session.execute.side_effect = [mock_result]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        assert result == []
+        assert db_session.execute.call_count == 1
--- a/backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
+++ b/backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
@@ -1,223 +0,0 @@
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.access.models import DocumentAccess
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
-from onyx.document_index.interfaces_new import IndexingMetadata
-from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.opensearch.opensearch_document_index import (
-    OpenSearchDocumentIndex,
-)
-from onyx.indexing.models import ChunkEmbedding
-from onyx.indexing.models import DocMetadataAwareIndexChunk
-
-
-def _make_chunk(
-    doc_id: str,
-    chunk_id: int,
-) -> DocMetadataAwareIndexChunk:
-    """Creates a minimal DocMetadataAwareIndexChunk for testing."""
-    doc = Document(
-        id=doc_id,
-        sections=[TextSection(text="test", link="http://test.com")],
-        source=DocumentSource.FILE,
-        semantic_identifier="test_doc",
-        metadata={},
-    )
-    access = DocumentAccess.build(
-        user_emails=[],
-        user_groups=[],
-        external_user_emails=[],
-        external_user_group_ids=[],
-        is_public=True,
-    )
-    return DocMetadataAwareIndexChunk(
-        chunk_id=chunk_id,
-        blurb="test",
-        content="test content",
-        source_links={0: "http://test.com"},
-        image_file_id=None,
-        section_continuation=False,
-        source_document=doc,
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        doc_summary="",
-        chunk_context="",
-        contextual_rag_reserved_tokens=0,
-        embeddings=ChunkEmbedding(full_embedding=[0.1] * 10, mini_chunk_embeddings=[]),
-        title_embedding=[0.1] * 10,
-        tenant_id="test_tenant",
-        access=access,
-        document_sets=set(),
-        user_project=[],
-        personas=[],
-        boost=0,
-        aggregated_chunk_boost_factor=1.0,
-        ancestor_hierarchy_node_ids=[],
-    )
-
-
-def _make_index() -> tuple[OpenSearchDocumentIndex, MagicMock]:
-    """Creates an OpenSearchDocumentIndex with a mocked client.
-    Returns the index and the mock for bulk_index_documents."""
-    mock_client = MagicMock()
-    mock_bulk = MagicMock()
-    mock_client.bulk_index_documents = mock_bulk
-
-    tenant_state = TenantState(tenant_id="test_tenant", multitenant=False)
-
-    index = OpenSearchDocumentIndex.__new__(OpenSearchDocumentIndex)
-    index._index_name = "test_index"
-    index._client = mock_client
-    index._tenant_state = tenant_state
-
-    return index, mock_bulk
-
-
-def _make_metadata(doc_id: str, chunk_count: int) -> IndexingMetadata:
-    return IndexingMetadata(
-        doc_id_to_chunk_cnt_diff={
-            doc_id: IndexingMetadata.ChunkCounts(
-                old_chunk_cnt=0,
-                new_chunk_cnt=chunk_count,
-            ),
-        },
-    )
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_under_batch_limit_flushes_once() -> None:
-    """A document with fewer chunks than MAX_CHUNKS_PER_DOC_BATCH should flush once."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 50
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    assert mock_bulk.call_count == 1
-    batch_arg = mock_bulk.call_args_list[0]
-    assert len(batch_arg.kwargs["documents"]) == num_chunks
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_over_batch_limit_flushes_multiple_times() -> None:
-    """A document with more chunks than MAX_CHUNKS_PER_DOC_BATCH should flush multiple times."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 250
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    # 250 chunks / 100 per batch = 3 flushes (100 + 100 + 50)
-    assert mock_bulk.call_count == 3
-    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
-    assert batch_sizes == [100, 100, 50]
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_exactly_at_batch_limit() -> None:
-    """A document with exactly MAX_CHUNKS_PER_DOC_BATCH chunks should flush once
-    (the flush happens on the next chunk, not at the boundary)."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 100
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    # 100 chunks hit the >= check on chunk 101 which doesn't exist,
-    # so final flush handles all 100
-    # Actually: the elif fires when len(current_chunks) >= 100, which happens
-    # when current_chunks has 100 items and the 101st chunk arrives.
-    # With exactly 100 chunks, the 100th chunk makes len == 99, then appended -> 100.
-    # No 101st chunk arrives, so the final flush handles all 100.
-    assert mock_bulk.call_count == 1
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_one_over_batch_limit() -> None:
-    """101 chunks for one doc: first 100 flushed when the 101st arrives, then
-    the 101st is flushed at the end."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 101
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    assert mock_bulk.call_count == 2
-    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
-    assert batch_sizes == [100, 1]
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_multiple_docs_each_under_limit_flush_per_doc() -> None:
-    """Multiple documents each under the batch limit should flush once per document."""
-    index, mock_bulk = _make_index()
-    chunks = []
-    for doc_idx in range(3):
-        doc_id = f"doc_{doc_idx}"
-        for chunk_idx in range(50):
-            chunks.append(_make_chunk(doc_id, chunk_idx))
-
-    metadata = IndexingMetadata(
-        doc_id_to_chunk_cnt_diff={
-            f"doc_{i}": IndexingMetadata.ChunkCounts(old_chunk_cnt=0, new_chunk_cnt=50)
-            for i in range(3)
-        },
-    )
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    # 3 documents = 3 flushes (one per doc boundary + final)
-    assert mock_bulk.call_count == 3
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_delete_called_once_per_document() -> None:
-    """Even with multiple flushes for a single document, delete should only be
-    called once per document."""
-    index, _mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 250
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0) as mock_delete:
-        index.index(chunks, metadata)
-
-    mock_delete.assert_called_once_with(doc_id, None)
--- a/backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
+++ b/backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
@@ -1,152 +0,0 @@
-"""Unit tests for VespaDocumentIndex.index().
-
-These tests mock all external I/O (HTTP calls, thread pools) and verify
-the streaming logic, ID cleaning/mapping, and DocumentInsertionRecord
-construction.
-"""
-
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.access.models import DocumentAccess
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
-from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
-from onyx.document_index.interfaces_new import IndexingMetadata
-from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
-from onyx.indexing.models import ChunkEmbedding
-from onyx.indexing.models import DocMetadataAwareIndexChunk
-from onyx.indexing.models import IndexChunk
-
-
-def _make_chunk(
-    doc_id: str,
-    chunk_id: int = 0,
-    content: str = "test content",
-) -> DocMetadataAwareIndexChunk:
-    doc = Document(
-        id=doc_id,
-        semantic_identifier="test_doc",
-        sections=[TextSection(text=content, link=None)],
-        source=DocumentSource.NOT_APPLICABLE,
-        metadata={},
-    )
-    index_chunk = IndexChunk(
-        chunk_id=chunk_id,
-        blurb=content[:50],
-        content=content,
-        source_links=None,
-        image_file_id=None,
-        section_continuation=False,
-        source_document=doc,
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        contextual_rag_reserved_tokens=0,
-        doc_summary="",
-        chunk_context="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        embeddings=ChunkEmbedding(
-            full_embedding=[0.1] * 10,
-            mini_chunk_embeddings=[],
-        ),
-        title_embedding=None,
-    )
-    access = DocumentAccess.build(
-        user_emails=[],
-        user_groups=[],
-        external_user_emails=[],
-        external_user_group_ids=[],
-        is_public=True,
-    )
-    return DocMetadataAwareIndexChunk.from_index_chunk(
-        index_chunk=index_chunk,
-        access=access,
-        document_sets=set(),
-        user_project=[],
-        personas=[],
-        boost=0,
-        aggregated_chunk_boost_factor=1.0,
-        tenant_id="test_tenant",
-    )
-
-
-def _make_indexing_metadata(
-    doc_ids: list[str],
-    old_counts: list[int],
-    new_counts: list[int],
-) -> IndexingMetadata:
-    return IndexingMetadata(
-        doc_id_to_chunk_cnt_diff={
-            doc_id: IndexingMetadata.ChunkCounts(
-                old_chunk_cnt=old,
-                new_chunk_cnt=new,
-            )
-            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)
-        }
-    )
-
-
-def _stub_enrich(
-    doc_id: str,
-    old_chunk_cnt: int,
-) -> EnrichedDocumentIndexingInfo:
-    """Build an EnrichedDocumentIndexingInfo that says 'no chunks to delete'
-    when old_chunk_cnt == 0, or 'has existing chunks' otherwise."""
-    return EnrichedDocumentIndexingInfo(
-        doc_id=doc_id,
-        chunk_start_index=0,
-        old_version=False,
-        chunk_end_index=old_chunk_cnt,
-    )
-
-
-@patch("onyx.document_index.vespa.vespa_document_index.batch_index_vespa_chunks")
-@patch("onyx.document_index.vespa.vespa_document_index.delete_vespa_chunks")
-@patch(
-    "onyx.document_index.vespa.vespa_document_index.get_document_chunk_ids",
-    return_value=[],
-)
-@patch("onyx.document_index.vespa.vespa_document_index._enrich_basic_chunk_info")
-@patch(
-    "onyx.document_index.vespa.vespa_document_index.BATCH_SIZE",
-    3,
-)
-def test_index_respects_batch_size(
-    mock_enrich: MagicMock,
-    mock_get_chunk_ids: MagicMock,  # noqa: ARG001
-    mock_delete: MagicMock,  # noqa: ARG001
-    mock_batch_index: MagicMock,
-) -> None:
-    """When chunks exceed BATCH_SIZE, batch_index_vespa_chunks is called
-    multiple times with correctly sized batches."""
-    mock_enrich.return_value = _stub_enrich("doc1", old_chunk_cnt=0)
-
-    index = VespaDocumentIndex(
-        index_name="test_index",
-        tenant_state=TenantState(tenant_id="test_tenant", multitenant=False),
-        large_chunks_enabled=False,
-        httpx_client=MagicMock(),
-    )
-
-    chunks = [_make_chunk("doc1", chunk_id=i) for i in range(7)]
-    metadata = _make_indexing_metadata(["doc1"], old_counts=[0], new_counts=[7])
-
-    results = index.index(chunks=chunks, indexing_metadata=metadata)
-
-    assert len(results) == 1
-
-    # With BATCH_SIZE=3 and 7 chunks: batches of 3, 3, 1
-    assert mock_batch_index.call_count == 3
-    batch_sizes = [len(c.kwargs["chunks"]) for c in mock_batch_index.call_args_list]
-    assert batch_sizes == [3, 3, 1]
-
-    # Verify all chunks are accounted for and in order
-    all_indexed = [
-        chunk for c in mock_batch_index.call_args_list for chunk in c.kwargs["chunks"]
-    ]
-    assert len(all_indexed) == 7
-    assert [c.chunk_id for c in all_indexed] == list(range(7))
--- a/backend/tests/unit/onyx/file_processing/fixtures/owner_protected.pdf
+++ b/backend/tests/unit/onyx/file_processing/fixtures/owner_protected.pdf
@@ -0,0 +1,76 @@
+%PDF-1.3
+%<25><><EFBFBD><EFBFBD>
+1 0 obj
+<<
+/Producer <1083d595b1>
+>>
+endobj
+2 0 obj
+<<
+/Type /Pages
+/Count 1
+/Kids [ 4 0 R ]
+>>
+endobj
+3 0 obj
+<<
+/Type /Catalog
+/Pages 2 0 R
+>>
+endobj
+4 0 obj
+<<
+/Type /Page
+/Resources <<
+/Font <<
+/F1 <<
+/Type /Font
+/Subtype /Type1
+/BaseFont /Helvetica
+>>
+>>
+>>
+/MediaBox [ 0.0 0.0 200 200 ]
+/Contents 5 0 R
+/Parent 2 0 R
+>>
+endobj
+5 0 obj
+<<
+/Length 42
+>>
+stream
+,N<><6~<7E>)<29><><EFBFBD><EFBFBD><EFBFBD>u<EFBFBD><0C><><EFBFBD>Zc'<27><>>8g<38><67><EFBFBD>n<EFBFBD><6E><EFBFBD><EFBFBD><EFBFBD>9"
+endstream
+endobj
+6 0 obj
+<<
+/V 2
+/R 3
+/Length 128
+/P 4294967292
+/Filter /Standard
+/O <6a340a292629053da84a6d8b19a5d505953b8b3fdac3d2d389fde0e354528d44>
+/U <d6f0dc91c7b9de264a8d708515468e6528bf4e5e4e758a4164004e56fffa0108>
+>>
+endobj
+xref
+0 7
+0000000000 65535 f 
+0000000015 00000 n 
+0000000059 00000 n 
+0000000118 00000 n 
+0000000167 00000 n 
+0000000348 00000 n 
+0000000440 00000 n 
+trailer
+<<
+/Size 7
+/Root 3 0 R
+/Info 1 0 R
+/ID [ <6364336635356135633239323638353039306635656133623165313637366430> <6364336635356135633239323638353039306635656133623165313637366430> ]
+/Encrypt 6 0 R
+>>
+startxref
+655
+%%EOF
--- a/backend/tests/unit/onyx/file_processing/test_pdf.py
+++ b/backend/tests/unit/onyx/file_processing/test_pdf.py
@@ -12,6 +12,10 @@ dependency on pypdf internals (pypdf.generic).
 from io import BytesIO
 from pathlib import Path

+import pytest
+
+from onyx.file_processing import extract_file_text
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.file_processing.extract_file_text import pdf_to_text
 from onyx.file_processing.extract_file_text import read_pdf_file
 from onyx.file_processing.password_validation import is_pdf_protected
@@ -54,6 +58,12 @@ class TestReadPdfFile:
        text, _, _ = read_pdf_file(_load("encrypted.pdf"), pdf_pass="wrong")
        assert text == ""

+    def test_owner_password_only_pdf_extracts_text(self) -> None:
+        """A PDF encrypted with only an owner password (no user password)
+        should still yield its text content. Regression for #9754."""
+        text, _, _ = read_pdf_file(_load("owner_protected.pdf"))
+        assert "Hello World" in text
+
    def test_empty_pdf(self) -> None:
        text, _, _ = read_pdf_file(_load("empty.pdf"))
        assert text.strip() == ""
@@ -90,6 +100,80 @@ class TestReadPdfFile:
        # Returned list is empty when callback is used
        assert images == []

+    def test_image_cap_skips_images_above_limit(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """When the embedded-image cap is exceeded, remaining images are skipped.
+
+        The cap protects the user-file-processing worker from OOMing on PDFs
+        with thousands of embedded images. Setting the cap to 0 should yield
+        zero extracted images even though the fixture has one.
+        """
+        monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
+        _, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
+        assert images == []
+
+    def test_image_cap_at_limit_extracts_up_to_cap(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """A cap >= image count behaves identically to the uncapped path."""
+        monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 100)
+        _, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
+        assert len(images) == 1
+
+    def test_image_cap_with_callback_stops_streaming_at_limit(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """The cap also short-circuits the streaming callback path."""
+        monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
+        collected: list[tuple[bytes, str]] = []
+
+        def callback(data: bytes, name: str) -> None:
+            collected.append((data, name))
+
+        read_pdf_file(
+            _load("with_image.pdf"), extract_images=True, image_callback=callback
+        )
+        assert collected == []
+
+
+# ── count_pdf_embedded_images ────────────────────────────────────────────
+
+
+class TestCountPdfEmbeddedImages:
+    def test_returns_count_for_normal_pdf(self) -> None:
+        assert count_pdf_embedded_images(_load("with_image.pdf"), cap=10) == 1
+
+    def test_short_circuits_above_cap(self) -> None:
+        # with_image.pdf has 1 image. cap=0 means "anything > 0 is over cap" —
+        # function returns on first increment as the over-cap sentinel.
+        assert count_pdf_embedded_images(_load("with_image.pdf"), cap=0) == 1
+
+    def test_returns_zero_for_pdf_without_images(self) -> None:
+        assert count_pdf_embedded_images(_load("simple.pdf"), cap=10) == 0
+
+    def test_returns_zero_for_invalid_pdf(self) -> None:
+        assert count_pdf_embedded_images(BytesIO(b"not a pdf"), cap=10) == 0
+
+    def test_returns_zero_for_password_locked_pdf(self) -> None:
+        # encrypted.pdf has an open password; we can't inspect without it, so
+        # the helper returns 0 — callers rely on the password-protected check
+        # that runs earlier in the upload pipeline.
+        assert count_pdf_embedded_images(_load("encrypted.pdf"), cap=10) == 0
+
+    def test_inspects_owner_password_only_pdf(self) -> None:
+        # owner_protected.pdf is encrypted but has no open password. It should
+        # decrypt with an empty string and count images normally. The fixture
+        # has zero images, so 0 is a real count (not the "bail on encrypted"
+        # path).
+        assert count_pdf_embedded_images(_load("owner_protected.pdf"), cap=10) == 0
+
+    def test_preserves_file_position(self) -> None:
+        pdf = _load("with_image.pdf")
+        pdf.seek(42)
+        count_pdf_embedded_images(pdf, cap=10)
+        assert pdf.tell() == 42
+

 # ── pdf_to_text ──────────────────────────────────────────────────────────

@@ -117,6 +201,12 @@ class TestIsPdfProtected:
    def test_protected_pdf(self) -> None:
        assert is_pdf_protected(_load("encrypted.pdf")) is True

+    def test_owner_password_only_is_not_protected(self) -> None:
+        """A PDF with only an owner password (permission restrictions) but no
+        user password should NOT be considered protected — any viewer can open
+        it without prompting for a password."""
+        assert is_pdf_protected(_load("owner_protected.pdf")) is False
+
    def test_preserves_file_position(self) -> None:
        pdf = _load("simple.pdf")
        pdf.seek(42)
--- a/backend/tests/unit/onyx/file_processing/test_pptx_to_text.py
+++ b/backend/tests/unit/onyx/file_processing/test_pptx_to_text.py
@@ -0,0 +1,79 @@
+import io
+
+from pptx import Presentation  # type: ignore[import-untyped]
+from pptx.chart.data import CategoryChartData  # type: ignore[import-untyped]
+from pptx.enum.chart import XL_CHART_TYPE  # type: ignore[import-untyped]
+from pptx.util import Inches  # type: ignore[import-untyped]
+
+from onyx.file_processing.extract_file_text import pptx_to_text
+
+
+def _make_pptx_with_chart() -> io.BytesIO:
+    """Create an in-memory pptx with one text slide and one chart slide."""
+    prs = Presentation()
+
+    # Slide 1: text only
+    slide1 = prs.slides.add_slide(prs.slide_layouts[1])
+    slide1.shapes.title.text = "Introduction"
+    slide1.placeholders[1].text = "This is the first slide."
+
+    # Slide 2: chart
+    slide2 = prs.slides.add_slide(prs.slide_layouts[5])  # Blank layout
+    chart_data = CategoryChartData()
+    chart_data.categories = ["Q1", "Q2", "Q3"]
+    chart_data.add_series("Revenue", (100, 200, 300))
+    slide2.shapes.add_chart(
+        XL_CHART_TYPE.COLUMN_CLUSTERED,
+        Inches(1),
+        Inches(1),
+        Inches(6),
+        Inches(4),
+        chart_data,
+    )
+
+    buf = io.BytesIO()
+    prs.save(buf)
+    buf.seek(0)
+    return buf
+
+
+def _make_pptx_without_chart() -> io.BytesIO:
+    """Create an in-memory pptx with a single text-only slide."""
+    prs = Presentation()
+    slide = prs.slides.add_slide(prs.slide_layouts[1])
+    slide.shapes.title.text = "Hello World"
+    slide.placeholders[1].text = "Some content here."
+
+    buf = io.BytesIO()
+    prs.save(buf)
+    buf.seek(0)
+    return buf
+
+
+class TestPptxToText:
+    def test_chart_is_omitted(self) -> None:
+        # Precondition
+        pptx_file = _make_pptx_with_chart()
+
+        # Under test
+        result = pptx_to_text(pptx_file)
+
+        # Postcondition
+        assert "Introduction" in result
+        assert "first slide" in result
+        assert "[chart omitted]" in result
+        # The actual chart data should NOT appear in the output.
+        assert "Revenue" not in result
+        assert "Q1" not in result
+
+    def test_text_only_pptx(self) -> None:
+        # Precondition
+        pptx_file = _make_pptx_without_chart()
+
+        # Under test
+        result = pptx_to_text(pptx_file)
+
+        # Postcondition
+        assert "Hello World" in result
+        assert "Some content" in result
+        assert "[chart omitted]" not in result
--- a/backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
+++ b/backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
@@ -1,391 +0,0 @@
-"""Unit tests for _embed_chunks_to_store.
-
-Tests cover:
-  - Single batch, no failures
-  - Multiple batches, no failures
-  - Failure in a single batch
-  - Cross-batch document failure scrubbing
-  - Later batches skip already-failed docs
-  - Empty input
-  - All chunks fail
-"""
-
-from collections.abc import Callable
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.connectors.models import ConnectorFailure
-from onyx.connectors.models import Document
-from onyx.connectors.models import DocumentFailure
-from onyx.connectors.models import DocumentSource
-from onyx.connectors.models import TextSection
-from onyx.indexing.chunk_batch_store import ChunkBatchStore
-from onyx.indexing.indexing_pipeline import _embed_chunks_to_store
-from onyx.indexing.models import ChunkEmbedding
-from onyx.indexing.models import DocAwareChunk
-from onyx.indexing.models import IndexChunk
-
-
-def _make_doc(doc_id: str) -> Document:
-    return Document(
-        id=doc_id,
-        semantic_identifier="test",
-        source=DocumentSource.FILE,
-        sections=[TextSection(text="test", link=None)],
-        metadata={},
-    )
-
-
-def _make_chunk(doc_id: str, chunk_id: int) -> DocAwareChunk:
-    return DocAwareChunk(
-        chunk_id=chunk_id,
-        blurb="test",
-        content="test content",
-        source_links=None,
-        image_file_id=None,
-        section_continuation=False,
-        source_document=_make_doc(doc_id),
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        doc_summary="",
-        chunk_context="",
-        contextual_rag_reserved_tokens=0,
-    )
-
-
-def _make_index_chunk(doc_id: str, chunk_id: int) -> IndexChunk:
-    """Create an IndexChunk (a DocAwareChunk with embeddings)."""
-    return IndexChunk(
-        chunk_id=chunk_id,
-        blurb="test",
-        content="test content",
-        source_links=None,
-        image_file_id=None,
-        section_continuation=False,
-        source_document=_make_doc(doc_id),
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        doc_summary="",
-        chunk_context="",
-        contextual_rag_reserved_tokens=0,
-        embeddings=ChunkEmbedding(
-            full_embedding=[0.1] * 10,
-            mini_chunk_embeddings=[],
-        ),
-        title_embedding=None,
-    )
-
-
-def _make_failure(doc_id: str) -> ConnectorFailure:
-    return ConnectorFailure(
-        failed_document=DocumentFailure(document_id=doc_id, document_link=None),
-        failure_message="embedding failed",
-        exception=RuntimeError("embedding failed"),
-    )
-
-
-def _mock_embed_success(
-    chunks: list[DocAwareChunk], **_kwargs: object
-) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-    """Simulate successful embedding of all chunks."""
-    return (
-        [_make_index_chunk(c.source_document.id, c.chunk_id) for c in chunks],
-        [],
-    )
-
-
-def _mock_embed_fail_doc(
-    fail_doc_id: str,
-) -> Callable[..., tuple[list[IndexChunk], list[ConnectorFailure]]]:
-    """Return an embed mock that fails all chunks for a specific doc."""
-
-    def _embed(
-        chunks: list[DocAwareChunk], **_kwargs: object
-    ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-        successes = [
-            _make_index_chunk(c.source_document.id, c.chunk_id)
-            for c in chunks
-            if c.source_document.id != fail_doc_id
-        ]
-        failures = (
-            [_make_failure(fail_doc_id)]
-            if any(c.source_document.id == fail_doc_id for c in chunks)
-            else []
-        )
-        return successes, failures
-
-    return _embed
-
-
-class TestEmbedChunksInBatches:
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
-    def test_single_batch_no_failures(self, mock_embed: MagicMock) -> None:
-        """All chunks fit in one batch and embed successfully."""
-        mock_embed.side_effect = _mock_embed_success
-
-        with ChunkBatchStore() as store:
-            chunks = [_make_chunk("doc1", i) for i in range(3)]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 3
-            assert len(result.connector_failures) == 0
-
-            # Verify stored contents
-            assert len(store._batch_files()) == 1
-            stored = list(store.stream())
-            assert len(stored) == 3
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_multiple_batches_no_failures(self, mock_embed: MagicMock) -> None:
-        """Chunks are split across multiple batches, all succeed."""
-        mock_embed.side_effect = _mock_embed_success
-
-        with ChunkBatchStore() as store:
-            chunks = [_make_chunk("doc1", i) for i in range(7)]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 7
-            assert len(result.connector_failures) == 0
-            assert len(store._batch_files()) == 3  # 3 + 3 + 1
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
-    def test_single_batch_with_failure(self, mock_embed: MagicMock) -> None:
-        """One doc fails embedding, its chunks are excluded from results."""
-        mock_embed.side_effect = _mock_embed_fail_doc("doc2")
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("doc1", 0),
-                _make_chunk("doc2", 1),
-                _make_chunk("doc1", 2),
-            ]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.connector_failures) == 1
-            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
-            assert "doc2" not in successful_doc_ids
-            assert "doc1" in successful_doc_ids
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_cross_batch_failure_scrubs_earlier_batch(
-        self, mock_embed: MagicMock
-    ) -> None:
-        """Doc A spans batches 0 and 1.  It succeeds in batch 0 but fails in
-        batch 1.  Its chunks should be scrubbed from batch 0's batch file."""
-        call_count = 0
-
-        def _embed(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            nonlocal call_count
-            call_count += 1
-            if call_count == 1:
-                return _mock_embed_success(chunks)
-            else:
-                return _mock_embed_fail_doc("docA")(chunks)
-
-        mock_embed.side_effect = _embed
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("docA", 0),
-                _make_chunk("docA", 1),
-                _make_chunk("docA", 2),
-                _make_chunk("docA", 3),
-                _make_chunk("docB", 0),
-                _make_chunk("docB", 1),
-            ]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            # docA should be fully excluded from results
-            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
-            assert "docA" not in successful_doc_ids
-            assert "docB" in successful_doc_ids
-            assert len(result.connector_failures) == 1
-
-            # Verify batch 0 was scrubbed of docA chunks
-            all_stored = list(store.stream())
-            stored_doc_ids = {c.source_document.id for c in all_stored}
-            assert "docA" not in stored_doc_ids
-            assert "docB" in stored_doc_ids
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_later_batch_skips_already_failed_doc(self, mock_embed: MagicMock) -> None:
-        """If docA fails in batch 0, its chunks in batch 1 are skipped
-        entirely (never sent to the embedder)."""
-        embedded_doc_ids: list[str] = []
-
-        def _embed(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            for c in chunks:
-                embedded_doc_ids.append(c.source_document.id)
-            return _mock_embed_fail_doc("docA")(chunks)
-
-        mock_embed.side_effect = _embed
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("docA", 0),
-                _make_chunk("docA", 1),
-                _make_chunk("docA", 2),
-                _make_chunk("docA", 3),
-                _make_chunk("docB", 0),
-                _make_chunk("docB", 1),
-            ]
-            _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-        # docA should only appear in batch 0, not batch 1
-        batch_1_doc_ids = embedded_doc_ids[3:]
-        assert "docA" not in batch_1_doc_ids
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_failed_doc_skipped_in_later_batch_while_other_doc_succeeds(
-        self, mock_embed: MagicMock
-    ) -> None:
-        """doc1 spans batches 0 and 1, doc2 only in batch 1.  Batch 0 fails
-        doc1.  In batch 1, doc1 chunks should be skipped but doc2 chunks
-        should still be embedded successfully."""
-        embedded_chunks: list[list[str]] = []
-
-        def _embed(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            embedded_chunks.append([c.source_document.id for c in chunks])
-            return _mock_embed_fail_doc("doc1")(chunks)
-
-        mock_embed.side_effect = _embed
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("doc1", 0),
-                _make_chunk("doc1", 1),
-                _make_chunk("doc1", 2),
-                _make_chunk("doc1", 3),
-                _make_chunk("doc2", 0),
-                _make_chunk("doc2", 1),
-            ]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            # doc1 should be fully excluded, doc2 fully included
-            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
-            assert "doc1" not in successful_doc_ids
-            assert "doc2" in successful_doc_ids
-            assert len(result.successful_chunk_ids) == 2  # doc2's 2 chunks
-
-            # Batch 1 should only contain doc2 (doc1 was filtered before embedding)
-            assert len(embedded_chunks) == 2
-            assert "doc1" not in embedded_chunks[1]
-            assert embedded_chunks[1] == ["doc2", "doc2"]
-
-            # Verify on-disk state has no doc1 chunks
-            all_stored = list(store.stream())
-            assert all(c.source_document.id == "doc2" for c in all_stored)
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    def test_empty_input(self, mock_embed: MagicMock) -> None:
-        """Empty chunk list produces empty results."""
-        mock_embed.side_effect = _mock_embed_success
-
-        with ChunkBatchStore() as store:
-            result = _embed_chunks_to_store(
-                chunks=[],
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 0
-            assert len(result.connector_failures) == 0
-            mock_embed.assert_not_called()
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
-    def test_all_chunks_fail(self, mock_embed: MagicMock) -> None:
-        """When all documents fail, results have no successful chunks."""
-
-        def _fail_all(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            doc_ids = {c.source_document.id for c in chunks}
-            return [], [_make_failure(doc_id) for doc_id in doc_ids]
-
-        mock_embed.side_effect = _fail_all
-
-        with ChunkBatchStore() as store:
-            chunks = [_make_chunk("doc1", 0), _make_chunk("doc2", 1)]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 0
-            assert len(result.connector_failures) == 2
--- a/backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
+++ b/backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
@@ -116,7 +116,7 @@ def _run_adapter_build(
    project_ids_map: dict[str, list[int]],
    persona_ids_map: dict[str, list[int]],
 ) -> list[DocMetadataAwareIndexChunk]:
-    """Helper that runs UserFileIndexingAdapter.prepare_enrichment + enrich_chunk
+    """Helper that runs UserFileIndexingAdapter.build_metadata_aware_chunks
    with all external dependencies mocked."""
    from onyx.indexing.adapters.user_file_indexing_adapter import (
        UserFileIndexingAdapter,
@@ -155,16 +155,18 @@ def _run_adapter_build(
            side_effect=Exception("no LLM in tests"),
        ),
    ):
-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id="test_tenant",
-            chunks=[chunk],
+            context=context,
        )
-        return [enricher.enrich_chunk(chunk, 1.0)]
+
+    return result.chunks


-def test_prepare_enrichment_includes_persona_ids() -> None:
-    """UserFileIndexingAdapter.prepare_enrichment writes persona IDs
+def test_build_metadata_aware_chunks_includes_persona_ids() -> None:
+    """UserFileIndexingAdapter.build_metadata_aware_chunks writes persona IDs
    fetched from the DB into each chunk's metadata."""
    file_id = str(uuid4())
    persona_ids = [5, 12]
@@ -181,7 +183,7 @@ def test_prepare_enrichment_includes_persona_ids() -> None:
    assert chunks[0].user_project == project_ids


-def test_prepare_enrichment_missing_file_defaults_to_empty() -> None:
+def test_build_metadata_aware_chunks_missing_file_defaults_to_empty() -> None:
    """When a file has no persona or project associations in the DB, the
    adapter should default to empty lists (not KeyError or None)."""
    file_id = str(uuid4())
--- a/backend/tests/unit/onyx/server/scim/test_user_endpoints.py
+++ b/backend/tests/unit/onyx/server/scim/test_user_endpoints.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+from typing import Any
 from unittest.mock import MagicMock
 from unittest.mock import patch
 from uuid import uuid4
@@ -9,7 +10,9 @@ from uuid import uuid4
 from fastapi import Response
 from sqlalchemy.exc import IntegrityError

+from ee.onyx.server.scim.api import _check_seat_availability
 from ee.onyx.server.scim.api import _scim_name_to_str
+from ee.onyx.server.scim.api import _seat_lock_id_for_tenant
 from ee.onyx.server.scim.api import create_user
 from ee.onyx.server.scim.api import delete_user
 from ee.onyx.server.scim.api import get_user
@@ -741,3 +744,80 @@ class TestEmailCasePreservation:
        resource = parse_scim_user(result)
        assert resource.userName == "Alice@Example.COM"
        assert resource.emails[0].value == "Alice@Example.COM"
+
+
+class TestSeatLock:
+    """Tests for the advisory lock in _check_seat_availability."""
+
+    @patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_abc")
+    def test_acquires_advisory_lock_before_checking(
+        self,
+        _mock_tenant: MagicMock,
+        mock_dal: MagicMock,
+    ) -> None:
+        """The advisory lock must be acquired before the seat check runs."""
+        call_order: list[str] = []
+
+        def track_execute(stmt: Any, _params: Any = None) -> None:
+            if "pg_advisory_xact_lock" in str(stmt):
+                call_order.append("lock")
+
+        mock_dal.session.execute.side_effect = track_execute
+
+        with patch(
+            "ee.onyx.server.scim.api.fetch_ee_implementation_or_noop"
+        ) as mock_fetch:
+            mock_result = MagicMock()
+            mock_result.available = True
+            mock_fn = MagicMock(return_value=mock_result)
+            mock_fetch.return_value = mock_fn
+
+            def track_check(*_args: Any, **_kwargs: Any) -> Any:
+                call_order.append("check")
+                return mock_result
+
+            mock_fn.side_effect = track_check
+
+            _check_seat_availability(mock_dal)
+
+        assert call_order == ["lock", "check"]
+
+    @patch("ee.onyx.server.scim.api.get_current_tenant_id", return_value="tenant_xyz")
+    def test_lock_uses_tenant_scoped_key(
+        self,
+        _mock_tenant: MagicMock,
+        mock_dal: MagicMock,
+    ) -> None:
+        """The lock id must be derived from the tenant via _seat_lock_id_for_tenant."""
+        mock_result = MagicMock()
+        mock_result.available = True
+        mock_check = MagicMock(return_value=mock_result)
+
+        with patch(
+            "ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
+            return_value=mock_check,
+        ):
+            _check_seat_availability(mock_dal)
+
+        mock_dal.session.execute.assert_called_once()
+        params = mock_dal.session.execute.call_args[0][1]
+        assert params["lock_id"] == _seat_lock_id_for_tenant("tenant_xyz")
+
+    def test_seat_lock_id_is_stable_and_tenant_scoped(self) -> None:
+        """Lock id must be deterministic and differ across tenants."""
+        assert _seat_lock_id_for_tenant("t1") == _seat_lock_id_for_tenant("t1")
+        assert _seat_lock_id_for_tenant("t1") != _seat_lock_id_for_tenant("t2")
+
+    def test_no_lock_when_ee_absent(
+        self,
+        mock_dal: MagicMock,
+    ) -> None:
+        """No advisory lock should be acquired when the EE check is absent."""
+        with patch(
+            "ee.onyx.server.scim.api.fetch_ee_implementation_or_noop",
+            return_value=None,
+        ):
+            result = _check_seat_availability(mock_dal)
+
+        assert result is None
+        mock_dal.session.execute.assert_not_called()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Justin Tahara	4fa3deedb9	fix(image): Cap Uploaded File Image Count (#10298 )	2026-04-16 21:34:18 -07:00
Nikolas Garza	593ccbcc66	fix(scim): add advisory lock to prevent seat limit race condition (#10048 ) to release v3.1 (#10067 )	2026-04-10 12:43:08 -07:00
Nikolas Garza	9910487f37	feat(federated): full thread replies + direct URL fetch in Slack search (#9940 ) to release v3.1 (#10051 )	2026-04-09 18:24:08 -07:00
Justin Tahara	d158639844	fix(llm): Azure custom model support + Mistral tool call message ordering (#9729 )	2026-04-09 13:58:30 -07:00
Jamison Lahman	6d2bd97412	fix: Custom LLM Provider requires a Provider Name (#10000 )	2026-04-08 10:55:58 -07:00
Jamison Lahman	3d48b6a63e	fix: LM Studio API key field mismatch (#9991 ) to release v3.1 (#9992 ) Co-authored-by: Raunak Bhagat <r@rabh.io>	2026-04-08 10:21:38 -07:00
Jamison Lahman	2a7b7c9187	fix: onboarding LLM Provider configuration fixes (#9972 ) to release v3.1 (#9989 ) Co-authored-by: Raunak Bhagat <r@rabh.io>	2026-04-08 10:07:01 -07:00
github-actions[bot]	c348d1855d	feat: generic OpenAI Compatible LLM Provider setup (#9968 ) to release v3.1 (#9975 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-04-07 13:20:41 -07:00
github-actions[bot]	b4579a1365	fix(indexing, powerpoint files): Patch markitdown _convert_chart_to_markdown to no-op (#9970 ) to release v3.1 (#9979 ) Co-authored-by: acaprau <48705707+acaprau@users.noreply.github.com>	2026-04-07 13:02:34 -07:00
Justin Tahara	893c094aed	fix(groups): Global Curator Permissions (#9974 )	2026-04-07 13:01:38 -07:00
Wenxi	f8a55712d2	fix: set correct ee mode for mcp server (#9933 )	2026-04-07 09:13:23 -07:00
github-actions[bot]	591afd4fb1	fix: stop falsely rejecting owner-password-only PDFs as protected (#9953 ) to release v3.1 (#9962 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-06 21:23:13 -07:00
github-actions[bot]	9328070dc0	fix(federated): prevent masked credentials from corrupting stored secrets (#9868 ) to release v3.1 (#9928 )	2026-04-05 16:18:14 -07:00
Jamison Lahman	6163521126	Revert "chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898 ) to release v3.1" (#9910 )	2026-04-03 18:32:10 -07:00
Jamison Lahman	d42c5616b0	chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898 ) to release v3.1 (#9902 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-04-03 16:18:19 -07:00
Justin Tahara	aeb4fdd6c1	fix(db): remove unnecessary selectinload(User.memories) from auth paths (#9838 )	2026-04-01 17:16:57 -07:00
Nikolas Garza	c673959714	fix(celery): use broker connection pool to prevent Redis connection leak (#9682 )	2026-03-31 18:40:07 -07:00
Justin Tahara	cb36562802	fix(perf): optimize chat sessions query to prevent DB cascading failures (#9802 )	2026-03-31 18:37:38 -07:00
Jessica Singh	efc424bf3e	feat(voice): VAD auto-stop only when auto-send is enabled (#9809 )	2026-03-31 17:46:28 -07:00
Evan Lohn	e0baaf85e5	fix: Anthropic litellm thinking workaround (#9713 )	2026-03-27 14:12:15 -07:00
github-actions[bot]	a0ffd47e2c	chore(playwright): deflake `settings_pages.spec.ts` (#9684 ) to release v3.1 (#9702 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-27 09:08:51 -07:00
Jamison Lahman	d0396a1337	fix(fe): Popover content doesnt overflow on small screens (#9612 ) to release v3.1 (#9700 )	2026-03-27 08:43:53 -07:00