fix(image): Cap Uploaded File Image Count (#10298 )

Revert "chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898 ) to release v3.0" (#9909 )
chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898 ) to release v3.0 (#9903 )
2026-04-21 17:36:44 +00:00 · 2026-04-16 21:36:45 -07:00 · 2026-04-03 18:32:06 -07:00 · 2026-04-03 16:02:05 -07:00 · 2026-03-26 11:00:49 -07:00 · 2026-03-26 11:00:49 -07:00
27 changed files with 908 additions and 122 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -29,20 +29,32 @@ jobs:
      build-backend-craft: ${{ steps.check.outputs.build-backend-craft }}
      build-model-server: ${{ steps.check.outputs.build-model-server }}
      is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
-      is-stable: ${{ steps.check.outputs.is-stable }}
      is-beta: ${{ steps.check.outputs.is-beta }}
-      is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
      is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
-      is-craft-latest: ${{ steps.check.outputs.is-craft-latest }}
+      is-latest: ${{ steps.check.outputs.is-latest }}
      is-test-run: ${{ steps.check.outputs.is-test-run }}
      sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
      short-sha: ${{ steps.check.outputs.short-sha }}
    steps:
+      - name: Checkout (for git tags)
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+          fetch-tags: true
+
+      - name: Setup uv
+        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+        with:
+          version: "0.9.9"
+          enable-cache: false
+
      - name: Check which components to build and version info
        id: check
        env:
          EVENT_NAME: ${{ github.event_name }}
        run: |
+          set -eo pipefail
          TAG="${GITHUB_REF_NAME}"
          # Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
          SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
@@ -54,9 +66,8 @@ jobs:
          IS_VERSION_TAG=false
          IS_STABLE=false
          IS_BETA=false
-          IS_STABLE_STANDALONE=false
          IS_BETA_STANDALONE=false
-          IS_CRAFT_LATEST=false
+          IS_LATEST=false
          IS_PROD_TAG=false
          IS_TEST_RUN=false
          BUILD_DESKTOP=false
@@ -67,9 +78,6 @@ jobs:
          BUILD_MODEL_SERVER=true

          # Determine tag type based on pattern matching (do regex checks once)
-          if [[ "$TAG" == craft-* ]]; then
-            IS_CRAFT_LATEST=true
-          fi
          if [[ "$TAG" == *cloud* ]]; then
            IS_CLOUD=true
          fi
@@ -97,20 +105,28 @@ jobs:
            fi
          fi

-          # Craft-latest builds backend with Craft enabled
-          if [[ "$IS_CRAFT_LATEST" == "true" ]]; then
-            BUILD_BACKEND_CRAFT=true
-            BUILD_BACKEND=false
-          fi
-
          # Standalone version checks (for backend/model-server - version excluding cloud tags)
-          if [[ "$IS_STABLE" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
-            IS_STABLE_STANDALONE=true
-          fi
          if [[ "$IS_BETA" == "true" ]] && [[ "$IS_CLOUD" != "true" ]]; then
            IS_BETA_STANDALONE=true
          fi

+          # Determine if this tag should get the "latest" Docker tag.
+          # Only the highest semver stable tag (vX.Y.Z exactly) gets "latest".
+          if [[ "$IS_STABLE" == "true" ]]; then
+            HIGHEST_STABLE=$(uv run --no-sync --with onyx-devtools ods latest-stable-tag) || {
+              echo "::error::Failed to determine highest stable tag via 'ods latest-stable-tag'"
+              exit 1
+            }
+            if [[ "$TAG" == "$HIGHEST_STABLE" ]]; then
+              IS_LATEST=true
+            fi
+          fi
+
+          # Build craft-latest backend alongside the regular latest.
+          if [[ "$IS_LATEST" == "true" ]]; then
+            BUILD_BACKEND_CRAFT=true
+          fi
+
          # Determine if this is a production tag
          # Production tags are: version tags (v1.2.3*) or nightly tags
          if [[ "$IS_VERSION_TAG" == "true" ]] || [[ "$IS_NIGHTLY" == "true" ]]; then
@@ -129,11 +145,9 @@ jobs:
            echo "build-backend-craft=$BUILD_BACKEND_CRAFT"
            echo "build-model-server=$BUILD_MODEL_SERVER"
            echo "is-cloud-tag=$IS_CLOUD"
-            echo "is-stable=$IS_STABLE"
            echo "is-beta=$IS_BETA"
-            echo "is-stable-standalone=$IS_STABLE_STANDALONE"
            echo "is-beta-standalone=$IS_BETA_STANDALONE"
-            echo "is-craft-latest=$IS_CRAFT_LATEST"
+            echo "is-latest=$IS_LATEST"
            echo "is-test-run=$IS_TEST_RUN"
            echo "sanitized-tag=$SANITIZED_TAG"
            echo "short-sha=$SHORT_SHA"
@@ -600,7 +614,8 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
-            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}

@@ -1037,7 +1052,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
-            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}

@@ -1249,8 +1264,6 @@ jobs:
            latest=false
          tags: |
            type=raw,value=craft-latest
-            # TODO: Consider aligning craft-latest tags with regular backend builds (e.g., latest, edge, beta)
-            # to keep tagging strategy consistent across all backend images

      - name: Create and push manifest
        env:
@@ -1473,7 +1486,8 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
-            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}

--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -47,6 +47,8 @@ RUN apt-get update && \
        gcc \
        nano \
        vim \
+        # Install procps so kubernetes exec sessions can use ps aux for debugging
+        procps \
        libjemalloc2 \
        && \
    rm -rf /var/lib/apt/lists/* && \
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -788,6 +788,29 @@ MAX_FILE_SIZE_BYTES = int(
    os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
 )  # 2GB in bytes

+# Maximum embedded images allowed in a single file. PDFs (and other formats)
+# with thousands of embedded images can OOM the user-file-processing worker
+# because every image is decoded with PIL and then sent to the vision LLM.
+# Enforced both at upload time (rejects the file) and during extraction
+# (defense-in-depth: caps the number of images materialized).
+#
+# Clamped to >= 0; a negative env value would turn upload validation into
+# always-fail and extraction into always-stop, which is never desired. 0
+# disables image extraction entirely, which is a valid (if aggressive) setting.
+MAX_EMBEDDED_IMAGES_PER_FILE = max(
+    0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
+)
+
+# Maximum embedded images allowed across all files in a single upload batch.
+# Protects against the scenario where a user uploads many files that each
+# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
+# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
+# worker under concurrency or run up surprise latency/cost. Also clamped
+# to >= 0.
+MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
+    0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
+)
+
 # Use document summary for contextual rag
 USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
 # Use chunk summary for contextual rag
--- a/backend/onyx/db/index_attempt.py
+++ b/backend/onyx/db/index_attempt.py
@@ -583,6 +583,67 @@ def get_latest_index_attempt_for_cc_pair_id(
    return db_session.execute(stmt).scalar_one_or_none()


+def get_latest_successful_index_attempt_for_cc_pair_id(
+    db_session: Session,
+    connector_credential_pair_id: int,
+    secondary_index: bool = False,
+) -> IndexAttempt | None:
+    """Returns the most recent successful index attempt for the given cc pair,
+    filtered to the current (or future) search settings.
+    Uses MAX(id) semantics to match get_latest_index_attempts_by_status."""
+    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
+    stmt = (
+        select(IndexAttempt)
+        .where(
+            IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
+            IndexAttempt.status.in_(
+                [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
+            ),
+        )
+        .join(SearchSettings)
+        .where(SearchSettings.status == status)
+        .order_by(desc(IndexAttempt.id))
+        .limit(1)
+    )
+    return db_session.execute(stmt).scalar_one_or_none()
+
+
+def get_latest_successful_index_attempts_parallel(
+    secondary_index: bool = False,
+) -> Sequence[IndexAttempt]:
+    """Batch version: returns the latest successful index attempt per cc pair.
+    Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful())."""
+    model_status = (
+        IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
+    )
+    with get_session_with_current_tenant() as db_session:
+        latest_ids = (
+            select(
+                IndexAttempt.connector_credential_pair_id,
+                func.max(IndexAttempt.id).label("max_id"),
+            )
+            .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
+            .where(
+                SearchSettings.status == model_status,
+                IndexAttempt.status.in_(
+                    [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
+                ),
+            )
+            .group_by(IndexAttempt.connector_credential_pair_id)
+            .subquery()
+        )
+
+        stmt = select(IndexAttempt).join(
+            latest_ids,
+            (
+                IndexAttempt.connector_credential_pair_id
+                == latest_ids.c.connector_credential_pair_id
+            )
+            & (IndexAttempt.id == latest_ids.c.max_id),
+        )
+        return db_session.execute(stmt).scalars().all()
+
+
 def count_index_attempts_for_cc_pair(
    db_session: Session,
    cc_pair_id: int,
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -21,6 +21,7 @@ import chardet
 import openpyxl
 from PIL import Image

+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
 from onyx.configs.constants import ONYX_METADATA_FILENAME
 from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
 from onyx.file_processing.file_types import OnyxFileExtensions
@@ -176,6 +177,56 @@ def read_text_file(
    return file_content_raw, metadata


+def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
+    """Return the number of embedded images in a PDF, short-circuiting at cap+1.
+
+    Used to reject PDFs whose image count would OOM the user-file-processing
+    worker during indexing. Returns a value > cap as a sentinel once the count
+    exceeds the cap, so callers do not iterate thousands of image objects just
+    to report a number. Returns 0 if the PDF cannot be parsed.
+
+    Owner-password-only PDFs (permission restrictions but no open password) are
+    counted normally — they decrypt with an empty string. Truly password-locked
+    PDFs are skipped (return 0) since we can't inspect them; the caller should
+    ensure the password-protected check runs first.
+
+    Always restores the file pointer to its original position before returning.
+    """
+    from pypdf import PdfReader
+
+    try:
+        start_pos = file.tell()
+    except Exception:
+        start_pos = None
+    try:
+        if start_pos is not None:
+            file.seek(0)
+        reader = PdfReader(file)
+        if reader.is_encrypted:
+            # Try empty password first (owner-password-only PDFs); give up if that fails.
+            try:
+                if reader.decrypt("") == 0:
+                    return 0
+            except Exception:
+                return 0
+        count = 0
+        for page in reader.pages:
+            for _ in page.images:
+                count += 1
+                if count > cap:
+                    return count
+        return count
+    except Exception:
+        logger.warning("Failed to count embedded images in PDF", exc_info=True)
+        return 0
+    finally:
+        if start_pos is not None:
+            try:
+                file.seek(start_pos)
+            except Exception:
+                pass
+
+
 def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
    """
    Extract text from a PDF. For embedded images, a more complex approach is needed.
@@ -231,8 +282,27 @@ def read_pdf_file(
        )

        if extract_images:
+            image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+            images_processed = 0
+            cap_reached = False
            for page_num, page in enumerate(pdf_reader.pages):
+                if cap_reached:
+                    break
                for image_file_object in page.images:
+                    if images_processed >= image_cap:
+                        # Defense-in-depth backstop. Upload-time validation
+                        # should have rejected files exceeding the cap, but
+                        # we also break here so a single oversized file can
+                        # never pin a worker.
+                        logger.warning(
+                            "PDF embedded image cap reached (%d). "
+                            "Skipping remaining images on page %d and beyond.",
+                            image_cap,
+                            page_num + 1,
+                        )
+                        cap_reached = True
+                        break
+
                    image = Image.open(io.BytesIO(image_file_object.data))
                    img_byte_arr = io.BytesIO()
                    image.save(img_byte_arr, format=image.format)
@@ -245,6 +315,7 @@ def read_pdf_file(
                        image_callback(img_bytes, image_name)
                    else:
                        extracted_images.append((img_bytes, image_name))
+                    images_processed += 1

        return text, metadata, extracted_images

--- a/backend/onyx/server/documents/cc_pair.py
+++ b/backend/onyx/server/documents/cc_pair.py
@@ -43,6 +43,9 @@ from onyx.db.index_attempt import count_index_attempt_errors_for_cc_pair
 from onyx.db.index_attempt import count_index_attempts_for_cc_pair
 from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
 from onyx.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
+from onyx.db.index_attempt import (
+    get_latest_successful_index_attempt_for_cc_pair_id,
+)
 from onyx.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id
 from onyx.db.indexing_coordination import IndexingCoordination
 from onyx.db.models import IndexAttempt
@@ -190,6 +193,11 @@ def get_cc_pair_full_info(
        only_finished=False,
    )

+    latest_successful_attempt = get_latest_successful_index_attempt_for_cc_pair_id(
+        db_session=db_session,
+        connector_credential_pair_id=cc_pair_id,
+    )
+
    # Get latest permission sync attempt for status
    latest_permission_sync_attempt = None
    if cc_pair.access_type == AccessType.SYNC:
@@ -207,6 +215,11 @@ def get_cc_pair_full_info(
            cc_pair_id=cc_pair_id,
        ),
        last_index_attempt=latest_attempt,
+        last_successful_index_time=(
+            latest_successful_attempt.time_started
+            if latest_successful_attempt
+            else None
+        ),
        latest_deletion_attempt=get_deletion_attempt_snapshot(
            connector_id=cc_pair.connector_id,
            credential_id=cc_pair.credential_id,
--- a/backend/onyx/server/documents/connector.py
+++ b/backend/onyx/server/documents/connector.py
@@ -3,6 +3,7 @@ import math
 import mimetypes
 import os
 import zipfile
+from datetime import datetime
 from io import BytesIO
 from typing import Any
 from typing import cast
@@ -109,6 +110,9 @@ from onyx.db.federated import fetch_all_federated_connectors_parallel
 from onyx.db.index_attempt import get_index_attempts_for_cc_pair
 from onyx.db.index_attempt import get_latest_index_attempts_by_status
 from onyx.db.index_attempt import get_latest_index_attempts_parallel
+from onyx.db.index_attempt import (
+    get_latest_successful_index_attempts_parallel,
+)
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import FederatedConnector
 from onyx.db.models import IndexAttempt
@@ -1158,21 +1162,26 @@ def get_connector_indexing_status(
            ),
            (),
        ),
+        # Get most recent successful index attempts
+        (
+            lambda: get_latest_successful_index_attempts_parallel(
+                request.secondary_index,
+            ),
+            (),
+        ),
    ]

    if user and user.role == UserRole.ADMIN:
-        # For Admin users, we already got all the cc pair in editable_cc_pairs
-        # its not needed to get them again
        (
            editable_cc_pairs,
            federated_connectors,
            latest_index_attempts,
            latest_finished_index_attempts,
+            latest_successful_index_attempts,
        ) = run_functions_tuples_in_parallel(parallel_functions)
        non_editable_cc_pairs = []
    else:
        parallel_functions.append(
-            # Get non-editable connector/credential pairs
            (
                lambda: get_connector_credential_pairs_for_user_parallel(
                    user, False, None, True, True, False, True, request.source
@@ -1186,6 +1195,7 @@ def get_connector_indexing_status(
            federated_connectors,
            latest_index_attempts,
            latest_finished_index_attempts,
+            latest_successful_index_attempts,
            non_editable_cc_pairs,
        ) = run_functions_tuples_in_parallel(parallel_functions)

@@ -1197,6 +1207,9 @@ def get_connector_indexing_status(
    latest_finished_index_attempts = cast(
        list[IndexAttempt], latest_finished_index_attempts
    )
+    latest_successful_index_attempts = cast(
+        list[IndexAttempt], latest_successful_index_attempts
+    )

    document_count_info = get_document_counts_for_all_cc_pairs(db_session)

@@ -1206,42 +1219,48 @@ def get_connector_indexing_status(
        for connector_id, credential_id, cnt in document_count_info
    }

-    cc_pair_to_latest_index_attempt: dict[tuple[int, int], IndexAttempt] = {
-        (
-            attempt.connector_credential_pair.connector_id,
-            attempt.connector_credential_pair.credential_id,
-        ): attempt
-        for attempt in latest_index_attempts
-    }
+    def _attempt_lookup(
+        attempts: list[IndexAttempt],
+    ) -> dict[int, IndexAttempt]:
+        return {attempt.connector_credential_pair_id: attempt for attempt in attempts}

-    cc_pair_to_latest_finished_index_attempt: dict[tuple[int, int], IndexAttempt] = {
-        (
-            attempt.connector_credential_pair.connector_id,
-            attempt.connector_credential_pair.credential_id,
-        ): attempt
-        for attempt in latest_finished_index_attempts
-    }
+    cc_pair_to_latest_index_attempt = _attempt_lookup(latest_index_attempts)
+    cc_pair_to_latest_finished_index_attempt = _attempt_lookup(
+        latest_finished_index_attempts
+    )
+    cc_pair_to_latest_successful_index_attempt = _attempt_lookup(
+        latest_successful_index_attempts
+    )

    def build_connector_indexing_status(
        cc_pair: ConnectorCredentialPair,
        is_editable: bool,
    ) -> ConnectorIndexingStatusLite | None:
-        # TODO remove this to enable ingestion API
        if cc_pair.name == "DefaultCCPair":
            return None

-        latest_attempt = cc_pair_to_latest_index_attempt.get(
-            (cc_pair.connector_id, cc_pair.credential_id)
-        )
+        latest_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
        latest_finished_attempt = cc_pair_to_latest_finished_index_attempt.get(
-            (cc_pair.connector_id, cc_pair.credential_id)
+            cc_pair.id
+        )
+        latest_successful_attempt = cc_pair_to_latest_successful_index_attempt.get(
+            cc_pair.id
        )
        doc_count = cc_pair_to_document_cnt.get(
            (cc_pair.connector_id, cc_pair.credential_id), 0
        )

        return _get_connector_indexing_status_lite(
-            cc_pair, latest_attempt, latest_finished_attempt, is_editable, doc_count
+            cc_pair,
+            latest_attempt,
+            latest_finished_attempt,
+            (
+                latest_successful_attempt.time_started
+                if latest_successful_attempt
+                else None
+            ),
+            is_editable,
+            doc_count,
        )

    # Process editable cc_pairs
@@ -1402,6 +1421,7 @@ def _get_connector_indexing_status_lite(
    cc_pair: ConnectorCredentialPair,
    latest_index_attempt: IndexAttempt | None,
    latest_finished_index_attempt: IndexAttempt | None,
+    last_successful_index_time: datetime | None,
    is_editable: bool,
    document_cnt: int,
 ) -> ConnectorIndexingStatusLite | None:
@@ -1435,7 +1455,7 @@ def _get_connector_indexing_status_lite(
            else None
        ),
        last_status=latest_index_attempt.status if latest_index_attempt else None,
-        last_success=cc_pair.last_successful_index_time,
+        last_success=last_successful_index_time,
        docs_indexed=document_cnt,
        latest_index_attempt_docs_indexed=(
            latest_index_attempt.total_docs_indexed if latest_index_attempt else None
--- a/backend/onyx/server/documents/models.py
+++ b/backend/onyx/server/documents/models.py
@@ -330,6 +330,7 @@ class CCPairFullInfo(BaseModel):
        num_docs_indexed: int,  # not ideal, but this must be computed separately
        is_editable_for_current_user: bool,
        indexing: bool,
+        last_successful_index_time: datetime | None = None,
        last_permission_sync_attempt_status: PermissionSyncStatus | None = None,
        permission_syncing: bool = False,
        last_permission_sync_attempt_finished: datetime | None = None,
@@ -382,9 +383,7 @@ class CCPairFullInfo(BaseModel):
            creator_email=(
                cc_pair_model.creator.email if cc_pair_model.creator else None
            ),
-            last_indexed=(
-                last_index_attempt.time_started if last_index_attempt else None
-            ),
+            last_indexed=last_successful_index_time,
            last_pruned=cc_pair_model.last_pruned,
            last_full_permission_sync=cls._get_last_full_permission_sync(cc_pair_model),
            overall_indexing_speed=overall_indexing_speed,
--- a/backend/onyx/server/features/build/api/user_library.py
+++ b/backend/onyx/server/features/build/api/user_library.py
@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session

 from onyx.auth.users import current_user
 from onyx.background.celery.versioned_apps.client import app as celery_app
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -50,6 +52,9 @@ from onyx.db.engine.sql_engine import get_session
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.models import User
 from onyx.document_index.interfaces import DocumentMetadata
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
@@ -127,6 +132,49 @@ class DeleteFileResponse(BaseModel):
 # =============================================================================


+def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
+    """True if either the filename or the content-type indicates a PDF.
+
+    Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
+    ``Content-Type: application/octet-stream``), so we also fall back to
+    extension-based detection via ``mimetypes.guess_type`` on the filename.
+    """
+    if content_type == "application/pdf":
+        return True
+    guessed, _ = mimetypes.guess_type(filename)
+    return guessed == "application/pdf"
+
+
+def _check_pdf_image_caps(
+    filename: str, content: bytes, content_type: str | None, batch_total: int
+) -> int:
+    """Enforce per-file and per-batch embedded-image caps for PDFs.
+
+    Returns the number of embedded images in this file (0 for non-PDFs) so
+    callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
+    if either cap is exceeded.
+    """
+    if not _looks_like_pdf(filename, content_type):
+        return 0
+    file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+    batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
+    # Short-circuit at the larger cap so we get a useful count for both checks.
+    count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
+    if count > file_cap:
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            f"PDF '{filename}' contains too many embedded images "
+            f"(more than {file_cap}). Try splitting the document into smaller files.",
+        )
+    if batch_total + count > batch_cap:
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            f"Upload would exceed the {batch_cap}-image limit across all "
+            f"files in this batch. Try uploading fewer image-heavy files at once.",
+        )
+    return count
+
+
 def _sanitize_path(path: str) -> str:
    """Sanitize a file path, removing traversal attempts and normalizing.

@@ -356,6 +404,7 @@ async def upload_files(

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
+    batch_image_total = 0
    now = datetime.now(timezone.utc)

    # Sanitize the base path
@@ -375,6 +424,14 @@ async def upload_files(
                detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024*1024)}MB",
            )

+        # Reject PDFs with an unreasonable per-file or per-batch image count
+        batch_image_total += _check_pdf_image_caps(
+            filename=file.filename or "unnamed",
+            content=content,
+            content_type=file.content_type,
+            batch_total=batch_image_total,
+        )
+
        # Validate cumulative storage (existing + this upload batch)
        total_size += file_size
        if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
@@ -473,6 +530,7 @@ async def upload_zip(

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
+    batch_image_total = 0

    # Extract zip contents into a subfolder named after the zip file
    zip_name = api_sanitize_filename(file.filename or "upload")
@@ -511,6 +569,36 @@ async def upload_zip(
                    logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
                    continue

+                # Skip PDFs that would trip the per-file or per-batch image
+                # cap (would OOM the user-file-processing worker). Matches
+                # /upload behavior but uses skip-and-warn to stay consistent
+                # with the zip path's handling of oversized files.
+                zip_file_name = zip_info.filename.split("/")[-1]
+                zip_content_type, _ = mimetypes.guess_type(zip_file_name)
+                if zip_content_type == "application/pdf":
+                    image_count = count_pdf_embedded_images(
+                        BytesIO(file_content),
+                        max(
+                            MAX_EMBEDDED_IMAGES_PER_FILE,
+                            MAX_EMBEDDED_IMAGES_PER_UPLOAD,
+                        ),
+                    )
+                    if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
+                        logger.warning(
+                            "Skipping '%s' - exceeds %d per-file embedded-image cap",
+                            zip_info.filename,
+                            MAX_EMBEDDED_IMAGES_PER_FILE,
+                        )
+                        continue
+                    if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
+                        logger.warning(
+                            "Skipping '%s' - would exceed %d per-batch embedded-image cap",
+                            zip_info.filename,
+                            MAX_EMBEDDED_IMAGES_PER_UPLOAD,
+                        )
+                        continue
+                    batch_image_total += image_count
+
                total_size += file_size

                # Validate cumulative storage
--- a/backend/onyx/server/features/projects/projects_file_utils.py
+++ b/backend/onyx/server/features/projects/projects_file_utils.py
@@ -10,7 +10,10 @@ from pydantic import Field
 from sqlalchemy.orm import Session

 from onyx.configs.app_configs import FILE_TOKEN_COUNT_THRESHOLD
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
 from onyx.db.llm import fetch_default_llm_model
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
 from onyx.file_processing.file_types import OnyxFileExtensions
@@ -156,6 +159,11 @@ def categorize_uploaded_files(
        except RuntimeError as e:
            logger.warning(f"Failed to get current tenant ID: {str(e)}")

+    # Running total of embedded images across PDFs in this batch. Once the
+    # aggregate cap is reached, subsequent PDFs in the same upload are
+    # rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
+    batch_image_total = 0
+
    for upload in files:
        try:
            filename = get_safe_filename(upload)
@@ -204,6 +212,47 @@ def categorize_uploaded_files(
                    )
                    continue

+                # Reject PDFs with an unreasonable number of embedded images
+                # (either per-file or accumulated across this upload batch).
+                # A PDF with thousands of embedded images can OOM the
+                # user-file-processing celery worker because every image is
+                # decoded with PIL and then sent to the vision LLM.
+                if extension == ".pdf":
+                    file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+                    batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
+                    # Use the larger of the two caps as the short-circuit
+                    # threshold so we get a useful count for both checks.
+                    # count_pdf_embedded_images restores the stream position.
+                    count = count_pdf_embedded_images(
+                        upload.file, max(file_cap, batch_cap)
+                    )
+                    if count > file_cap:
+                        results.rejected.append(
+                            RejectedFile(
+                                filename=filename,
+                                reason=(
+                                    f"PDF contains too many embedded images "
+                                    f"(more than {file_cap}). Try splitting "
+                                    f"the document into smaller files."
+                                ),
+                            )
+                        )
+                        continue
+                    if batch_image_total + count > batch_cap:
+                        results.rejected.append(
+                            RejectedFile(
+                                filename=filename,
+                                reason=(
+                                    f"Upload would exceed the "
+                                    f"{batch_cap}-image limit across all "
+                                    f"files in this batch. Try uploading "
+                                    f"fewer image-heavy files at once."
+                                ),
+                            )
+                        )
+                        continue
+                    batch_image_total += count
+
                text_content = extract_file_text(
                    file=upload.file,
                    file_name=filename,
--- a/backend/tests/integration/tests/connector/test_last_indexed_time.py
+++ b/backend/tests/integration/tests/connector/test_last_indexed_time.py
@@ -0,0 +1,237 @@
+"""
+Integration tests for the "Last Indexed" time displayed on both the
+per-connector detail page and the all-connectors listing page.
+
+Expected behavior: "Last Indexed" = time_started of the most recent
+successful index attempt for the cc pair, regardless of pagination.
+
+Edge cases:
+1. First page of index attempts is entirely errors — last_indexed should
+   still reflect the older successful attempt beyond page 1.
+2. Credential swap — successful attempts, then failures after a
+   "credential change"; last_indexed should reflect the most recent
+   successful attempt.
+3. Mix of statuses — only the most recent successful attempt matters.
+4. COMPLETED_WITH_ERRORS counts as a success for last_indexed purposes.
+"""
+
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+
+from onyx.db.models import IndexingStatus
+from onyx.server.documents.models import CCPairFullInfo
+from onyx.server.documents.models import ConnectorIndexingStatusLite
+from tests.integration.common_utils.managers.cc_pair import CCPairManager
+from tests.integration.common_utils.managers.connector import ConnectorManager
+from tests.integration.common_utils.managers.credential import CredentialManager
+from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
+from tests.integration.common_utils.managers.user import UserManager
+from tests.integration.common_utils.test_models import DATestCCPair
+from tests.integration.common_utils.test_models import DATestUser
+
+
+def _wait_for_real_success(
+    cc_pair: DATestCCPair,
+    admin: DATestUser,
+) -> None:
+    """Wait for the initial index attempt to complete successfully."""
+    CCPairManager.wait_for_indexing_completion(
+        cc_pair,
+        after=datetime(2000, 1, 1, tzinfo=timezone.utc),
+        user_performing_action=admin,
+        timeout=120,
+    )
+
+
+def _get_detail(cc_pair_id: int, admin: DATestUser) -> CCPairFullInfo:
+    result = CCPairManager.get_single(cc_pair_id, admin)
+    assert result is not None
+    return result
+
+
+def _get_listing(cc_pair_id: int, admin: DATestUser) -> ConnectorIndexingStatusLite:
+    result = CCPairManager.get_indexing_status_by_id(cc_pair_id, admin)
+    assert result is not None
+    return result
+
+
+def test_last_indexed_first_page_all_errors(reset: None) -> None:  # noqa: ARG001
+    """When the first page of index attempts is entirely errors but an
+    older successful attempt exists, both the detail page and the listing
+    page should still show the time of that successful attempt.
+
+    The detail page UI uses page size 8. We insert 10 failed attempts
+    more recent than the initial success to push the success off page 1.
+    """
+    admin = UserManager.create(name="admin_first_page_errors")
+    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
+    _wait_for_real_success(cc_pair, admin)
+
+    # Baseline: last_success should be set from the initial successful run
+    listing_before = _get_listing(cc_pair.id, admin)
+    assert listing_before.last_success is not None
+
+    # 10 recent failures push the success off page 1
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=10,
+        cc_pair_id=cc_pair.id,
+        status=IndexingStatus.FAILED,
+        error_msg="simulated failure",
+        base_time=datetime.now(tz=timezone.utc),
+    )
+
+    detail = _get_detail(cc_pair.id, admin)
+    listing = _get_listing(cc_pair.id, admin)
+
+    assert (
+        detail.last_indexed is not None
+    ), "Detail page last_indexed is None even though a successful attempt exists"
+    assert (
+        listing.last_success is not None
+    ), "Listing page last_success is None even though a successful attempt exists"
+
+    # Both surfaces must agree
+    assert detail.last_indexed == listing.last_success, (
+        f"Detail last_indexed={detail.last_indexed} != "
+        f"listing last_success={listing.last_success}"
+    )
+
+
+def test_last_indexed_credential_swap_scenario(reset: None) -> None:  # noqa: ARG001
+    """Perform an actual credential swap: create connector + cred1 (cc_pair_1),
+    wait for success, then associate a new cred2 with the same connector
+    (cc_pair_2), wait for that to succeed, and inject failures on cc_pair_2.
+
+    cc_pair_2's last_indexed must reflect cc_pair_2's own success, not
+    cc_pair_1's older one. Both the detail page and listing page must agree.
+    """
+    admin = UserManager.create(name="admin_cred_swap")
+
+    connector = ConnectorManager.create(user_performing_action=admin)
+    cred1 = CredentialManager.create(user_performing_action=admin)
+    cc_pair_1 = CCPairManager.create(
+        connector_id=connector.id,
+        credential_id=cred1.id,
+        user_performing_action=admin,
+    )
+    _wait_for_real_success(cc_pair_1, admin)
+
+    cred2 = CredentialManager.create(user_performing_action=admin, name="swapped-cred")
+    cc_pair_2 = CCPairManager.create(
+        connector_id=connector.id,
+        credential_id=cred2.id,
+        user_performing_action=admin,
+    )
+    _wait_for_real_success(cc_pair_2, admin)
+
+    listing_after_swap = _get_listing(cc_pair_2.id, admin)
+    assert listing_after_swap.last_success is not None
+
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=10,
+        cc_pair_id=cc_pair_2.id,
+        status=IndexingStatus.FAILED,
+        error_msg="credential expired",
+        base_time=datetime.now(tz=timezone.utc),
+    )
+
+    detail = _get_detail(cc_pair_2.id, admin)
+    listing = _get_listing(cc_pair_2.id, admin)
+
+    assert detail.last_indexed is not None
+    assert listing.last_success is not None
+
+    assert detail.last_indexed == listing.last_success, (
+        f"Detail last_indexed={detail.last_indexed} != "
+        f"listing last_success={listing.last_success}"
+    )
+
+
+def test_last_indexed_mixed_statuses(reset: None) -> None:  # noqa: ARG001
+    """Mix of in_progress, failed, and successful attempts. Only the most
+    recent successful attempt's time matters."""
+    admin = UserManager.create(name="admin_mixed")
+    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
+    _wait_for_real_success(cc_pair, admin)
+
+    now = datetime.now(tz=timezone.utc)
+
+    # Success 5 hours ago
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=1,
+        cc_pair_id=cc_pair.id,
+        status=IndexingStatus.SUCCESS,
+        base_time=now - timedelta(hours=5),
+    )
+
+    # Failures 3 hours ago
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=3,
+        cc_pair_id=cc_pair.id,
+        status=IndexingStatus.FAILED,
+        error_msg="transient failure",
+        base_time=now - timedelta(hours=3),
+    )
+
+    # In-progress 1 hour ago
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=1,
+        cc_pair_id=cc_pair.id,
+        status=IndexingStatus.IN_PROGRESS,
+        base_time=now - timedelta(hours=1),
+    )
+
+    detail = _get_detail(cc_pair.id, admin)
+    listing = _get_listing(cc_pair.id, admin)
+
+    assert detail.last_indexed is not None
+    assert listing.last_success is not None
+
+    assert detail.last_indexed == listing.last_success, (
+        f"Detail last_indexed={detail.last_indexed} != "
+        f"listing last_success={listing.last_success}"
+    )
+
+
+def test_last_indexed_completed_with_errors(reset: None) -> None:  # noqa: ARG001
+    """COMPLETED_WITH_ERRORS is treated as a successful attempt (matching
+    IndexingStatus.is_successful()). When it is the most recent "success"
+    and later attempts all failed, both surfaces should reflect its time."""
+    admin = UserManager.create(name="admin_completed_errors")
+    cc_pair = CCPairManager.create_from_scratch(user_performing_action=admin)
+    _wait_for_real_success(cc_pair, admin)
+
+    now = datetime.now(tz=timezone.utc)
+
+    # COMPLETED_WITH_ERRORS 2 hours ago
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=1,
+        cc_pair_id=cc_pair.id,
+        status=IndexingStatus.COMPLETED_WITH_ERRORS,
+        base_time=now - timedelta(hours=2),
+    )
+
+    # 10 failures after — push everything else off page 1
+    IndexAttemptManager.create_test_index_attempts(
+        num_attempts=10,
+        cc_pair_id=cc_pair.id,
+        status=IndexingStatus.FAILED,
+        error_msg="post-partial failure",
+        base_time=now,
+    )
+
+    detail = _get_detail(cc_pair.id, admin)
+    listing = _get_listing(cc_pair.id, admin)
+
+    assert (
+        detail.last_indexed is not None
+    ), "COMPLETED_WITH_ERRORS should count as a success for last_indexed"
+    assert (
+        listing.last_success is not None
+    ), "COMPLETED_WITH_ERRORS should count as a success for last_success"
+
+    assert detail.last_indexed == listing.last_success, (
+        f"Detail last_indexed={detail.last_indexed} != "
+        f"listing last_success={listing.last_success}"
+    )
--- a/tools/ods/cmd/print_latest.go
+++ b/tools/ods/cmd/print_latest.go
@@ -0,0 +1,35 @@
+package cmd
+
+import (
+	"fmt"
+	"github.com/jmelahman/tag/git"
+	"github.com/spf13/cobra"
+)
+
+// NewLatestStableTagCommand creates the latest-stable-tag command.
+func NewLatestStableTagCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "latest-stable-tag",
+		Short: "Print the git tag that should receive the 'latest' Docker tag",
+		Long: `Print the highest stable (non-pre-release) semver tag in the repository.
+
+This is used during deployment to decide whether a given tag should
+receive the "latest" tag on Docker Hub. Only the highest vX.Y.Z tag
+qualifies. Tags with pre-release suffixes (e.g. v1.2.3-beta,
+v1.2.3-cloud.1) are excluded.`,
+		Args: cobra.NoArgs,
+		RunE: func(c *cobra.Command, _ []string) error {
+			tag, err := git.GetLatestStableSemverTag("")
+			if err != nil {
+				return fmt.Errorf("get latest stable semver tag: %w", err)
+			}
+			if tag == "" {
+				return fmt.Errorf("no stable semver tag found in repository")
+			}
+			fmt.Println(tag)
+			return nil
+		},
+	}
+
+	return cmd
+}
--- a/tools/ods/cmd/root.go
+++ b/tools/ods/cmd/root.go
@@ -52,6 +52,7 @@ func NewRootCommand() *cobra.Command {
 	cmd.AddCommand(NewScreenshotDiffCommand())
 	cmd.AddCommand(NewDesktopCommand())
 	cmd.AddCommand(NewWebCommand())
+	cmd.AddCommand(NewLatestStableTagCommand())
 	cmd.AddCommand(NewWhoisCommand())

 	return cmd
--- a/tools/ods/go.mod
+++ b/tools/ods/go.mod
@@ -3,12 +3,13 @@ module github.com/onyx-dot-app/onyx/tools/ods
 go 1.26.0

 require (
+	github.com/jmelahman/tag v0.5.2
 	github.com/sirupsen/logrus v1.9.3
-	github.com/spf13/cobra v1.10.1
-	github.com/spf13/pflag v1.0.9
+	github.com/spf13/cobra v1.10.2
+	github.com/spf13/pflag v1.0.10
 )

 require (
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
+	golang.org/x/sys v0.39.0 // indirect
 )
--- a/tools/ods/go.sum
+++ b/tools/ods/go.sum
@@ -4,20 +4,26 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/jmelahman/tag v0.5.2 h1:g6A/aHehu5tkA31mPoDsXBNr1FigZ9A82Y8WVgb/WsM=
+github.com/jmelahman/tag v0.5.2/go.mod h1:qmuqk19B1BKkpcg3kn7l/Eey+UqucLxgOWkteUGiG4Q=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
-github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
-github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
+github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
+github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
 github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
+github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
+golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
--- a/web/src/app/admin/connector/[ccPairId]/page.tsx
+++ b/web/src/app/admin/connector/[ccPairId]/page.tsx
@@ -626,10 +626,7 @@ function Main({ ccPairId }: { ccPairId: number }) {
          <div className="w-[200px]">
            <div className="text-sm font-medium mb-1">Last Indexed</div>
            <div className="text-sm text-text-default">
-              {timeAgo(
-                indexAttempts?.find((attempt) => attempt.status === "success")
-                  ?.time_started
-              ) ?? "-"}
+              {timeAgo(ccPair?.last_indexed) ?? "-"}
            </div>
          </div>

--- a/web/src/app/craft/components/InputBar.tsx
+++ b/web/src/app/craft/components/InputBar.tsx
@@ -13,6 +13,7 @@ import {
  type KeyboardEvent,
 } from "react";
 import { useRouter } from "next/navigation";
+import { getPastedFilesIfNoText } from "@/lib/clipboard";
 import { cn, isImageFile } from "@/lib/utils";
 import { Disabled } from "@opal/core";
 import {
@@ -230,21 +231,11 @@ const InputBar = memo(

      const handlePaste = useCallback(
        (event: ClipboardEvent) => {
-          const items = event.clipboardData?.items;
-          if (items) {
-            const pastedFiles: File[] = [];
-            for (let i = 0; i < items.length; i++) {
-              const item = items[i];
-              if (item && item.kind === "file") {
-                const file = item.getAsFile();
-                if (file) pastedFiles.push(file);
-              }
-            }
-            if (pastedFiles.length > 0) {
-              event.preventDefault();
-              // Context handles session binding internally
-              uploadFiles(pastedFiles);
-            }
+          const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
+          if (pastedFiles.length > 0) {
+            event.preventDefault();
+            // Context handles session binding internally
+            uploadFiles(pastedFiles);
          }
        },
        [uploadFiles]
--- a/web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
+++ b/web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
@@ -272,6 +272,22 @@ export default function UserLibraryModal({
                  </Disabled>
                </Section>

+                {/* The exact cap is controlled by the backend env var
+                    MAX_EMBEDDED_IMAGES_PER_FILE (default 500). This copy is
+                    deliberately vague so it doesn't drift if the limit is
+                    tuned per-deployment; the precise number is surfaced in
+                    the rejection error the server returns. */}
+                <Section
+                  flexDirection="row"
+                  justifyContent="end"
+                  padding={0.5}
+                  height="fit"
+                >
+                  <Text secondaryBody text03>
+                    PDFs with many embedded images may be rejected.
+                  </Text>
+                </Section>
+
                {isLoading ? (
                  <Section padding={2} height="fit">
                    <Text secondaryBody text03>
--- a/web/src/app/nrf/NRFPage.tsx
+++ b/web/src/app/nrf/NRFPage.tsx
@@ -295,7 +295,6 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
      // If we already have messages (chat session started), always use chat mode
      // (matches AppPage behavior where existing sessions bypass classification)
      if (hasMessages) {
-        resetInputBar();
        onSubmit({
          message: submittedMessage,
          currentMessageFiles: currentMessageFiles,
@@ -307,7 +306,6 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {

      // Build an onChat closure that captures additionalContext for this submission
      const onChat = (chatMessage: string) => {
-        resetInputBar();
        onSubmit({
          message: chatMessage,
          currentMessageFiles: currentMessageFiles,
@@ -326,7 +324,6 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
      onSubmit,
      currentMessageFiles,
      deepResearchEnabled,
-      resetInputBar,
      submitQuery,
      tabReadingEnabled,
      currentTabUrl,
--- a/web/src/components/dateRangeSelectors/AdminDateRangeSelector.tsx
+++ b/web/src/components/dateRangeSelectors/AdminDateRangeSelector.tsx
@@ -52,6 +52,7 @@ export const AdminDateRangeSelector = memo(function AdminDateRangeSelector({
        <Popover.Trigger asChild>
          {/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
          <Button
+            data-testid="admin-date-range-selector-button"
            secondary
            className={cn("justify-start", !value && "text-muted-foreground")}
            leftIcon={SvgCalendar}
--- a/web/src/lib/clipboard.test.ts
+++ b/web/src/lib/clipboard.test.ts
@@ -0,0 +1,89 @@
+import { getPastedFilesIfNoText } from "./clipboard";
+
+type MockClipboardData = Parameters<typeof getPastedFilesIfNoText>[0];
+
+function makeClipboardData({
+  textPlain = "",
+  text = "",
+  files = [],
+}: {
+  textPlain?: string;
+  text?: string;
+  files?: File[];
+}): MockClipboardData {
+  return {
+    items: files.map((file) => ({
+      kind: "file",
+      getAsFile: () => file,
+    })),
+    getData: (format: string) => {
+      if (format === "text/plain") {
+        return textPlain;
+      }
+
+      if (format === "text") {
+        return text;
+      }
+
+      return "";
+    },
+  };
+}
+
+describe("getPastedFilesIfNoText", () => {
+  it("prefers plain text over pasted files when both are present", () => {
+    const imageFile = new File(["slide preview"], "slide.png", {
+      type: "image/png",
+    });
+
+    expect(
+      getPastedFilesIfNoText(
+        makeClipboardData({
+          textPlain: "Welcome to PowerPoint for Mac",
+          files: [imageFile],
+        })
+      )
+    ).toEqual([]);
+  });
+
+  it("falls back to text data when text/plain is empty", () => {
+    const imageFile = new File(["slide preview"], "slide.png", {
+      type: "image/png",
+    });
+
+    expect(
+      getPastedFilesIfNoText(
+        makeClipboardData({
+          text: "Welcome to PowerPoint for Mac",
+          files: [imageFile],
+        })
+      )
+    ).toEqual([]);
+  });
+
+  it("still returns files for image-only pastes", () => {
+    const imageFile = new File(["slide preview"], "slide.png", {
+      type: "image/png",
+    });
+
+    expect(
+      getPastedFilesIfNoText(makeClipboardData({ files: [imageFile] }))
+    ).toEqual([imageFile]);
+  });
+
+  it("ignores whitespace-only text and keeps file pastes working", () => {
+    const imageFile = new File(["slide preview"], "slide.png", {
+      type: "image/png",
+    });
+
+    expect(
+      getPastedFilesIfNoText(
+        makeClipboardData({
+          textPlain: "   ",
+          text: "\n",
+          files: [imageFile],
+        })
+      )
+    ).toEqual([imageFile]);
+  });
+});
--- a/web/src/lib/clipboard.ts
+++ b/web/src/lib/clipboard.ts
@@ -0,0 +1,52 @@
+type ClipboardFileItem = {
+  kind: string;
+  getAsFile: () => File | null;
+};
+
+type ClipboardDataLike = {
+  items?: ArrayLike<ClipboardFileItem> | null;
+  getData: (format: string) => string;
+};
+
+function getClipboardText(
+  clipboardData: ClipboardDataLike,
+  format: "text/plain" | "text"
+): string {
+  try {
+    return clipboardData.getData(format);
+  } catch {
+    return "";
+  }
+}
+
+export function getPastedFilesIfNoText(
+  clipboardData?: ClipboardDataLike | null
+): File[] {
+  if (!clipboardData) {
+    return [];
+  }
+
+  const plainText = getClipboardText(clipboardData, "text/plain").trim();
+  const fallbackText = getClipboardText(clipboardData, "text").trim();
+
+  // Apps like PowerPoint on macOS can place both rendered image data and the
+  // original text on the clipboard. Prefer letting the textarea consume text.
+  if (plainText || fallbackText || !clipboardData.items) {
+    return [];
+  }
+
+  const pastedFiles: File[] = [];
+  for (let i = 0; i < clipboardData.items.length; i++) {
+    const item = clipboardData.items[i];
+    if (item?.kind !== "file") {
+      continue;
+    }
+
+    const file = item.getAsFile();
+    if (file) {
+      pastedFiles.push(file);
+    }
+  }
+
+  return pastedFiles;
+}
--- a/web/src/refresh-pages/AgentEditorPage.tsx
+++ b/web/src/refresh-pages/AgentEditorPage.tsx
@@ -967,6 +967,14 @@ export default function AgentEditorPage({
          validateOnChange
          validateOnBlur
          validateOnMount
+          initialTouched={{
+            description:
+              initialValues.description.length >
+              MAX_CHARACTERS_AGENT_DESCRIPTION,
+            starter_messages: initialValues.starter_messages.map(
+              (msg) => msg.length > MAX_CHARACTERS_STARTER_MESSAGE
+            ) as unknown as boolean,
+          }}
          initialStatus={{ warnings: {} }}
        >
          {({ isSubmitting, isValid, dirty, values, setFieldValue }) => {
@@ -1201,18 +1209,33 @@ export default function AgentEditorPage({
                          >
                            Cancel
                          </OpalButton>
-                          <Disabled
-                            disabled={
-                              isSubmitting ||
-                              !isValid ||
-                              !dirty ||
-                              hasUploadingFiles
+                          <SimpleTooltip
+                            tooltip={
+                              isSubmitting
+                                ? "Saving changes..."
+                                : !isValid
+                                  ? "Please fix the errors in the form before saving."
+                                  : !dirty
+                                    ? "No changes have been made."
+                                    : hasUploadingFiles
+                                      ? "Please wait for files to finish uploading."
+                                      : undefined
                            }
+                            side="bottom"
                          >
-                            <OpalButton type="submit">
-                              {existingAgent ? "Save" : "Create"}
-                            </OpalButton>
-                          </Disabled>
+                            <Disabled
+                              disabled={
+                                isSubmitting ||
+                                !isValid ||
+                                !dirty ||
+                                hasUploadingFiles
+                              }
+                            >
+                              <OpalButton type="submit">
+                                {existingAgent ? "Save" : "Create"}
+                              </OpalButton>
+                            </Disabled>
+                          </SimpleTooltip>
                        </div>
                      }
                      backButton
--- a/web/src/refresh-pages/AppPage.tsx
+++ b/web/src/refresh-pages/AppPage.tsx
@@ -465,7 +465,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {

  const onChat = useCallback(
    (message: string) => {
-      resetInputBar();
      onSubmit({
        message,
        currentMessageFiles,
@@ -476,7 +475,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
      }
    },
    [
-      resetInputBar,
      onSubmit,
      currentMessageFiles,
      deepResearchEnabledForCurrentWorkflow,
@@ -510,7 +508,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
      // If we're in an existing chat session, always use chat mode
      // (appMode only applies to new sessions)
      if (currentChatSessionId) {
-        resetInputBar();
        onSubmit({
          message,
          currentMessageFiles,
@@ -523,7 +520,7 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
      }

      // For new sessions, let the query controller handle routing.
-      // resetInputBar is called inside onChat for chat-routed queries.
+      // resetInputBar is called inside useChatController.onSubmit for chat-routed queries.
      // For search-routed queries, the input bar is intentionally kept
      // so the user can see and refine their search query.
      await submitQuery(message, onChat);
@@ -532,7 +529,6 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
      currentChatSessionId,
      submitQuery,
      onChat,
-      resetInputBar,
      onSubmit,
      currentMessageFiles,
      deepResearchEnabledForCurrentWorkflow,
--- a/web/src/refresh-pages/admin/ChatPreferencesPage.tsx
+++ b/web/src/refresh-pages/admin/ChatPreferencesPage.tsx
@@ -114,6 +114,10 @@ function MCPServerCard({
  const allToolIds = tools.map((t) => t.id);
  const serverEnabled =
    tools.length > 0 && tools.some((t) => isToolEnabled(t.id));
+  const needsAuth = !server.is_authenticated;
+  const authTooltip = needsAuth
+    ? "Authenticate this MCP server before enabling its tools."
+    : undefined;

  return (
    <ExpandableCard.Root isFolded={isFolded} onFoldedChange={setIsFolded}>
@@ -122,10 +126,13 @@ function MCPServerCard({
        description={server.description}
        icon={getActionIcon(server.server_url, server.name)}
        rightChildren={
-          <Switch
-            checked={serverEnabled}
-            onCheckedChange={(checked) => onToggleTools(allToolIds, checked)}
-          />
+          <SimpleTooltip tooltip={authTooltip} side="top">
+            <Switch
+              checked={serverEnabled}
+              onCheckedChange={(checked) => onToggleTools(allToolIds, checked)}
+              disabled={needsAuth}
+            />
+          </SimpleTooltip>
        }
      >
        {tools.length > 0 && (
@@ -158,12 +165,15 @@ function MCPServerCard({
                description={tool.description}
                icon={tool.icon}
                rightChildren={
-                  <Switch
-                    checked={isToolEnabled(tool.id)}
-                    onCheckedChange={(checked) =>
-                      onToggleTool(tool.id, checked)
-                    }
-                  />
+                  <SimpleTooltip tooltip={authTooltip} side="top">
+                    <Switch
+                      checked={isToolEnabled(tool.id)}
+                      onCheckedChange={(checked) =>
+                        onToggleTool(tool.id, checked)
+                      }
+                      disabled={needsAuth}
+                    />
+                  </SimpleTooltip>
                }
              />
            ))}
--- a/web/src/sections/input/AppInputBar.tsx
+++ b/web/src/sections/input/AppInputBar.tsx
@@ -21,6 +21,7 @@ import { ChatState } from "@/app/app/interfaces";
 import { useForcedTools } from "@/lib/hooks/useForcedTools";
 import { useAppMode } from "@/providers/AppModeProvider";
 import useAppFocus from "@/hooks/useAppFocus";
+import { getPastedFilesIfNoText } from "@/lib/clipboard";
 import { cn, isImageFile } from "@/lib/utils";
 import { Disabled } from "@opal/core";
 import { useUser } from "@/providers/UserProvider";
@@ -233,20 +234,10 @@ const AppInputBar = React.memo(
    }, [showFiles, currentMessageFiles]);

    function handlePaste(event: React.ClipboardEvent) {
-      const items = event.clipboardData?.items;
-      if (items) {
-        const pastedFiles = [];
-        for (let i = 0; i < items.length; i++) {
-          const item = items[i];
-          if (item && item.kind === "file") {
-            const file = item.getAsFile();
-            if (file) pastedFiles.push(file);
-          }
-        }
-        if (pastedFiles.length > 0) {
-          event.preventDefault();
-          handleFileUpload(pastedFiles);
-        }
+      const pastedFiles = getPastedFilesIfNoText(event.clipboardData);
+      if (pastedFiles.length > 0) {
+        event.preventDefault();
+        handleFileUpload(pastedFiles);
      }
    }

--- a/web/tests/e2e/admin/admin_pages.spec.ts
+++ b/web/tests/e2e/admin/admin_pages.spec.ts
@@ -187,7 +187,10 @@ for (const theme of THEMES) {
          /\//g,
          "-"
        )}`;
-        await expectScreenshot(page, { name: screenshotName });
+        await expectScreenshot(page, {
+          name: screenshotName,
+          mask: ['[data-testid="admin-date-range-selector-button"]'],
+        });
      });
    }
  });
Author	SHA1	Message	Date
Justin Tahara	b61109a747	fix(image): Cap Uploaded File Image Count (#10298 )	2026-04-16 21:36:45 -07:00
Jamison Lahman	78459fb3e7	Revert "chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898 ) to release v3.0" (#9909 )	2026-04-03 18:32:06 -07:00
Jamison Lahman	e243d7955b	chore(deps): bump litellm from 1.81.6 to 1.83.0 (#9898 ) to release v3.0 (#9903 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-04-03 16:02:05 -07:00
Wenxi	77f5411bf7	fix(ci): tag web-server and model-server with craft-latest (#9661 ) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-26 11:00:49 -07:00
Wenxi	c45caf1f1d	refactor: use ods latest-stable-tag to tag images in Docker Hub (#9281 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-03-26 11:00:49 -07:00
Wenxi	4f534249d6	refactor: sync craft latest builds with latest stable (#9279 )	2026-03-26 11:00:49 -07:00
Wenxi	eb87d88b89	feat(ods): use release-tag to print highest stable semver that should receive the `latest` tag (#9278 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-03-26 11:00:49 -07:00
github-actions[bot]	4fd6786ce2	fix(chat): dont clear input message after errors submitting (#9624 ) to release v3.0 (#9626 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-25 12:04:20 -07:00
github-actions[bot]	6919afe022	fix(ux): disable MCP Tools toggle if needs authenticated (#9607 ) to release v3.0 (#9608 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-24 15:54:52 -07:00
Justin Tahara	c4ac0fd286	fix(ui): Text focused paste from PowerPoint (#9603 )	2026-03-24 14:31:19 -07:00
github-actions[bot]	d2f8e38e67	chore(playwright): mask date switcher in screenshots (#9584 ) to release v3.0 (#9585 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-23 18:45:09 -07:00
github-actions[bot]	bbd57c5904	fix(ux): display invalid agent fields on load (#9582 ) to release v3.0 (#9583 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-23 17:29:22 -07:00
github-actions[bot]	546d5cd384	fix(ux): give a tooltip with reason agent edit cannot save (#9571 ) to release v3.0 (#9572 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-23 13:54:01 -07:00
Evan Lohn	f902f49483	fix: last index time consistency (#9546 )	2026-03-23 10:35:20 -07:00
Justin Tahara	ed3630e248	feat(backend): Adding procps (#9509 )	2026-03-19 16:34:58 -07:00