feat(connectors): convert Gong connector from poll to checkpointed (#10258 ) to release v3.2 (#10359 )

fix(metrics): Adding in hostname (#10335 )
fix(deps): install transitive vertexai dependency (#10328 ) to release v3.2 (#10332 )
2026-04-20 17:06:43 +00:00 · 2026-04-20 09:40:52 -07:00 · 2026-04-17 13:39:48 -07:00 · 2026-04-17 12:17:25 -07:00 · 2026-04-17 09:02:48 -07:00 · 2026-04-17 08:27:31 -07:00
48 changed files with 2630 additions and 923 deletions
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -10,6 +10,7 @@ from celery import bootsteps  # type: ignore
 from celery import Task
 from celery.app import trace
 from celery.exceptions import WorkerShutdown
+from celery.signals import before_task_publish
 from celery.signals import task_postrun
 from celery.signals import task_prerun
 from celery.states import READY_STATES
@@ -94,6 +95,17 @@ class TenantAwareTask(Task):
            CURRENT_TENANT_ID_CONTEXTVAR.set(None)


+@before_task_publish.connect
+def on_before_task_publish(
+    headers: dict[str, Any] | None = None,
+    **kwargs: Any,  # noqa: ARG001
+) -> None:
+    """Stamp the current wall-clock time into the task message headers so that
+    workers can compute queue wait time (time between publish and execution)."""
+    if headers is not None:
+        headers["enqueued_at"] = time.time()
+
+
@task_prerun.connect
 def on_task_prerun(
    sender: Any | None = None,  # noqa: ARG001
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -16,6 +16,12 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
 from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME
 from onyx.db.engine.sql_engine import SqlEngine
+from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
+from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
+from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
+from onyx.server.metrics.metrics_server import start_metrics_server
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT

@@ -36,6 +42,7 @@ def on_task_prerun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
+    on_celery_task_prerun(task_id, task)


@signals.task_postrun.connect
@@ -50,6 +57,31 @@ def on_task_postrun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
+    on_celery_task_postrun(task_id, task, state)
+
+
+@signals.task_retry.connect
+def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    task_id = getattr(getattr(sender, "request", None), "id", None)
+    on_celery_task_retry(task_id, sender)
+
+
+@signals.task_revoked.connect
+def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
+    task_name = getattr(sender, "name", None) or str(sender)
+    on_celery_task_revoked(kwargs.get("task_id"), task_name)
+
+
+@signals.task_rejected.connect
+def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    message = kwargs.get("message")
+    task_name: str | None = None
+    if message is not None:
+        headers = getattr(message, "headers", None) or {}
+        task_name = headers.get("task")
+    if task_name is None:
+        task_name = "unknown"
+    on_celery_task_rejected(None, task_name)


@celeryd_init.connect
@@ -90,6 +122,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

@worker_ready.connect
 def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    start_metrics_server("light")
    app_base.on_worker_ready(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -38,6 +38,12 @@ from onyx.redis.redis_connector_stop import RedisConnectorStop
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
+from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
+from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
+from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
+from onyx.server.metrics.metrics_server import start_metrics_server
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
@@ -59,6 +65,7 @@ def on_task_prerun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
+    on_celery_task_prerun(task_id, task)


@signals.task_postrun.connect
@@ -73,6 +80,31 @@ def on_task_postrun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
+    on_celery_task_postrun(task_id, task, state)
+
+
+@signals.task_retry.connect
+def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    task_id = getattr(getattr(sender, "request", None), "id", None)
+    on_celery_task_retry(task_id, sender)
+
+
+@signals.task_revoked.connect
+def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
+    task_name = getattr(sender, "name", None) or str(sender)
+    on_celery_task_revoked(kwargs.get("task_id"), task_name)
+
+
+@signals.task_rejected.connect
+def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    message = kwargs.get("message")
+    task_name: str | None = None
+    if message is not None:
+        headers = getattr(message, "headers", None) or {}
+        task_name = headers.get("task")
+    if task_name is None:
+        task_name = "unknown"
+    on_celery_task_rejected(None, task_name)


@celeryd_init.connect
@@ -212,6 +244,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

@worker_ready.connect
 def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    start_metrics_server("primary")
    app_base.on_worker_ready(sender, **kwargs)


--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -59,6 +59,11 @@ from onyx.redis.redis_connector_delete import RedisConnectorDelete
 from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.server.metrics.deletion_metrics import inc_deletion_blocked
+from onyx.server.metrics.deletion_metrics import inc_deletion_completed
+from onyx.server.metrics.deletion_metrics import inc_deletion_fence_reset
+from onyx.server.metrics.deletion_metrics import inc_deletion_started
+from onyx.server.metrics.deletion_metrics import observe_deletion_taskset_duration
 from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
 )
@@ -300,6 +305,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
                recent_index_attempts
                and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS
            ):
+                inc_deletion_blocked(tenant_id, "indexing")
                raise TaskDependencyError(
                    "Connector deletion - Delayed (indexing in progress): "
                    f"cc_pair={cc_pair_id} "
@@ -307,11 +313,13 @@ def try_generate_document_cc_pair_cleanup_tasks(
                )

        if redis_connector.prune.fenced:
+            inc_deletion_blocked(tenant_id, "pruning")
            raise TaskDependencyError(
                f"Connector deletion - Delayed (pruning in progress): cc_pair={cc_pair_id}"
            )

        if redis_connector.permissions.fenced:
+            inc_deletion_blocked(tenant_id, "permissions")
            raise TaskDependencyError(
                f"Connector deletion - Delayed (permissions in progress): cc_pair={cc_pair_id}"
            )
@@ -359,6 +367,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
        # set this only after all tasks have been added
        fence_payload.num_tasks = tasks_generated
        redis_connector.delete.set_fence(fence_payload)
+        inc_deletion_started(tenant_id)

    return tasks_generated

@@ -523,6 +532,12 @@ def monitor_connector_deletion_taskset(
                num_docs_synced=fence_data.num_tasks,
            )

+            duration = (
+                datetime.now(timezone.utc) - fence_data.submitted
+            ).total_seconds()
+            observe_deletion_taskset_duration(tenant_id, "success", duration)
+            inc_deletion_completed(tenant_id, "success")
+
        except Exception as e:
            db_session.rollback()
            stack_trace = traceback.format_exc()
@@ -541,6 +556,11 @@ def monitor_connector_deletion_taskset(
                f"Connector deletion exceptioned: "
                f"cc_pair={cc_pair_id} connector={connector_id_to_delete} credential={credential_id_to_delete}"
            )
+            duration = (
+                datetime.now(timezone.utc) - fence_data.submitted
+            ).total_seconds()
+            observe_deletion_taskset_duration(tenant_id, "failure", duration)
+            inc_deletion_completed(tenant_id, "failure")
            raise e

    task_logger.info(
@@ -717,5 +737,6 @@ def validate_connector_deletion_fence(
        f"fence={fence_key}"
    )

+    inc_deletion_fence_reset(tenant_id)
    redis_connector.delete.reset()
    return
--- a/backend/onyx/background/celery/tasks/docfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/docfetching/tasks.py
@@ -34,6 +34,7 @@ from onyx.db.index_attempt import mark_attempt_canceled
 from onyx.db.index_attempt import mark_attempt_failed
 from onyx.db.indexing_coordination import IndexingCoordination
 from onyx.redis.redis_connector import RedisConnector
+from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import global_version
 from shared_configs.configs import SENTRY_DSN
@@ -467,6 +468,15 @@ def docfetching_proxy_task(
                index_attempt.connector_credential_pair.connector.source.value
            )

+            cc_pair = index_attempt.connector_credential_pair
+            on_index_attempt_status_change(
+                tenant_id=tenant_id,
+                source=result.connector_source,
+                cc_pair_id=cc_pair_id,
+                connector_name=cc_pair.connector.name or f"cc_pair_{cc_pair_id}",
+                status="in_progress",
+            )
+
        while True:
            sleep(5)

--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -105,6 +105,9 @@ from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
 from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
 from onyx.redis.redis_utils import is_fence
+from onyx.server.metrics.connector_health_metrics import on_connector_error_state_change
+from onyx.server.metrics.connector_health_metrics import on_connector_indexing_success
+from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
 from onyx.server.runtime.onyx_runtime import OnyxRuntime
 from onyx.utils.logger import setup_logger
 from onyx.utils.middleware import make_randomized_onyx_request_id
@@ -400,7 +403,6 @@ def check_indexing_completion(
    tenant_id: str,
    task: Task,
 ) -> None:
-
    logger.info(
        f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
    )
@@ -521,13 +523,25 @@ def check_indexing_completion(

        # Update CC pair status if successful
        cc_pair = get_connector_credential_pair_from_id(
-            db_session, attempt.connector_credential_pair_id
+            db_session,
+            attempt.connector_credential_pair_id,
+            eager_load_connector=True,
        )
        if cc_pair is None:
            raise RuntimeError(
                f"CC pair {attempt.connector_credential_pair_id} not found in database"
            )

+        source = cc_pair.connector.source.value
+        connector_name = cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
+        on_index_attempt_status_change(
+            tenant_id=tenant_id,
+            source=source,
+            cc_pair_id=cc_pair.id,
+            connector_name=connector_name,
+            status=attempt.status.value,
+        )
+
        if attempt.status.is_successful():
            # NOTE: we define the last successful index time as the time the last successful
            # attempt finished. This is distinct from the poll_range_end of the last successful
@@ -548,10 +562,26 @@ def check_indexing_completion(
                event=MilestoneRecordType.CONNECTOR_SUCCEEDED,
            )

+            on_connector_indexing_success(
+                tenant_id=tenant_id,
+                source=source,
+                cc_pair_id=cc_pair.id,
+                connector_name=connector_name,
+                docs_indexed=attempt.new_docs_indexed or 0,
+                success_timestamp=attempt.time_updated.timestamp(),
+            )
+
            # Clear repeated error state on success
            if cc_pair.in_repeated_error_state:
                cc_pair.in_repeated_error_state = False
                db_session.commit()
+                on_connector_error_state_change(
+                    tenant_id=tenant_id,
+                    source=source,
+                    cc_pair_id=cc_pair.id,
+                    connector_name=connector_name,
+                    in_error=False,
+                )

            if attempt.status == IndexingStatus.SUCCESS:
                logger.info(
@@ -848,6 +878,16 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                        cc_pair_id=cc_pair_id,
                        in_repeated_error_state=True,
                    )
+                    error_connector_name = (
+                        cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
+                    )
+                    on_connector_error_state_change(
+                        tenant_id=tenant_id,
+                        source=cc_pair.connector.source.value,
+                        cc_pair_id=cc_pair_id,
+                        connector_name=error_connector_name,
+                        in_error=True,
+                    )
                    # When entering repeated error state, also pause the connector
                    # to prevent continued indexing retry attempts burning through embedding credits.
                    # NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -840,6 +840,29 @@ MAX_FILE_SIZE_BYTES = int(
    os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
 )  # 2GB in bytes

+# Maximum embedded images allowed in a single file. PDFs (and other formats)
+# with thousands of embedded images can OOM the user-file-processing worker
+# because every image is decoded with PIL and then sent to the vision LLM.
+# Enforced both at upload time (rejects the file) and during extraction
+# (defense-in-depth: caps the number of images materialized).
+#
+# Clamped to >= 0; a negative env value would turn upload validation into
+# always-fail and extraction into always-stop, which is never desired. 0
+# disables image extraction entirely, which is a valid (if aggressive) setting.
+MAX_EMBEDDED_IMAGES_PER_FILE = max(
+    0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
+)
+
+# Maximum embedded images allowed across all files in a single upload batch.
+# Protects against the scenario where a user uploads many files that each
+# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
+# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
+# worker under concurrency or run up surprise latency/cost. Also clamped
+# to >= 0.
+MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
+    0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
+)
+
 # Use document summary for contextual rag
 USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
 # Use chunk summary for contextual rag
--- a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
+++ b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
@@ -3,6 +3,7 @@ from collections.abc import Callable
 from collections.abc import Iterator
 from datetime import datetime
 from datetime import timezone
+from email.utils import parsedate_to_datetime
 from typing import Any
 from typing import TypeVar
 from urllib.parse import urljoin
@@ -10,7 +11,6 @@ from urllib.parse import urlparse

 import requests
 from dateutil.parser import parse
-from dateutil.parser import ParserError

 from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
 from onyx.configs.constants import DocumentSource
@@ -56,18 +56,16 @@ def time_str_to_utc(datetime_str: str) -> datetime:
            if fixed not in candidates:
                candidates.append(fixed)

-    last_exception: Exception | None = None
-    for candidate in candidates:
-        try:
-            dt = parse(candidate)
-            return datetime_to_utc(dt)
-        except (ValueError, ParserError) as exc:
-            last_exception = exc
+    # dateutil is the primary; the stdlib RFC 2822 parser is a fallback for
+    # inputs dateutil rejects (e.g. headers concatenated without a CRLF —
+    # TZ may be dropped, datetime_to_utc then assumes UTC).
+    for parser in (parse, parsedate_to_datetime):
+        for candidate in candidates:
+            try:
+                return datetime_to_utc(parser(candidate))
+            except (TypeError, ValueError, OverflowError):
+                continue

-    if last_exception is not None:
-        raise last_exception
-
-    # Fallback in case parsing failed without raising (should not happen)
    raise ValueError(f"Unable to parse datetime string: {datetime_str}")


--- a/backend/onyx/connectors/gmail/connector.py
+++ b/backend/onyx/connectors/gmail/connector.py
@@ -253,7 +253,17 @@ def thread_to_document(

    updated_at_datetime = None
    if updated_at:
-        updated_at_datetime = time_str_to_utc(updated_at)
+        try:
+            updated_at_datetime = time_str_to_utc(updated_at)
+        except (ValueError, OverflowError) as e:
+            # Old mailboxes contain RFC-violating Date headers. Drop the
+            # timestamp instead of aborting the indexing run.
+            logger.warning(
+                "Skipping unparseable Gmail Date header on thread %s: %r (%s)",
+                full_thread.get("id"),
+                updated_at,
+                e,
+            )

    id = full_thread.get("id")
    if not id:
--- a/backend/onyx/connectors/gong/connector.py
+++ b/backend/onyx/connectors/gong/connector.py
@@ -1,4 +1,5 @@
 import base64
+import copy
 import time
 from collections.abc import Generator
 from datetime import datetime
@@ -8,27 +9,58 @@ from typing import Any
 from typing import cast

 import requests
+from pydantic import BaseModel
 from requests.adapters import HTTPAdapter
 from urllib3.util import Retry

-from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from onyx.configs.app_configs import GONG_CONNECTOR_START_TIME
-from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.interfaces import GenerateDocumentsOutput
-from onyx.connectors.interfaces import LoadConnector
-from onyx.connectors.interfaces import PollConnector
+from onyx.connectors.interfaces import CheckpointedConnector
+from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.models import ConnectorCheckpoint
+from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
-from onyx.connectors.models import HierarchyNode
+from onyx.connectors.models import DocumentFailure
 from onyx.connectors.models import TextSection
 from onyx.utils.logger import setup_logger

 logger = setup_logger()


-class GongConnector(LoadConnector, PollConnector):
+class GongConnectorCheckpoint(ConnectorCheckpoint):
+    # Resolved workspace IDs to iterate through.
+    # None means "not yet resolved" — first checkpoint call resolves them.
+    # Inner None means "no workspace filter" (fetch all).
+    workspace_ids: list[str | None] | None = None
+    # Index into workspace_ids for current workspace
+    workspace_index: int = 0
+    # Gong API cursor for current workspace's transcript pagination
+    cursor: str | None = None
+    # Cached time range — computed once, reused across checkpoint calls
+    time_range: tuple[str, str] | None = None
+
+
+class _TranscriptPage(BaseModel):
+    """One page of transcripts from /v2/calls/transcript."""
+
+    transcripts: list[dict[str, Any]]
+    next_cursor: str | None = None
+
+
+class _CursorExpiredError(Exception):
+    """Raised when Gong rejects a pagination cursor as expired.
+
+    Gong pagination cursors TTL is ~1 hour from the first request in a
+    pagination sequence, not from the last cursor fetch. Since checkpointed
+    connector runs can pause between invocations, a resumed run may encounter
+    an expired cursor and must restart the current workspace from scratch.
+    See https://visioneers.gong.io/integrations-77/pagination-cursor-expires-after-1-hours-even-for-a-new-cursor-1382
+    """
+
+
+class GongConnector(CheckpointedConnector[GongConnectorCheckpoint]):
    BASE_URL = "https://api.gong.io"
    MAX_CALL_DETAILS_ATTEMPTS = 6
    CALL_DETAILS_DELAY = 30  # in seconds
@@ -38,13 +70,9 @@ class GongConnector(LoadConnector, PollConnector):
    def __init__(
        self,
        workspaces: list[str] | None = None,
-        batch_size: int = INDEX_BATCH_SIZE,
-        continue_on_fail: bool = CONTINUE_ON_CONNECTOR_FAILURE,
        hide_user_info: bool = False,
    ) -> None:
        self.workspaces = workspaces
-        self.batch_size: int = batch_size
-        self.continue_on_fail = continue_on_fail
        self.auth_token_basic: str | None = None
        self.hide_user_info = hide_user_info
        self._last_request_time: float = 0.0
@@ -98,67 +126,50 @@ class GongConnector(LoadConnector, PollConnector):
        # Then the user input is treated as the name
        return {**id_id_map, **name_id_map}

-    def _get_transcript_batches(
-        self, start_datetime: str | None = None, end_datetime: str | None = None
-    ) -> Generator[list[dict[str, Any]], None, None]:
-        body: dict[str, dict] = {"filter": {}}
+    def _fetch_transcript_page(
+        self,
+        start_datetime: str | None,
+        end_datetime: str | None,
+        workspace_id: str | None,
+        cursor: str | None,
+    ) -> _TranscriptPage:
+        """Fetch one page of transcripts from the Gong API.
+
+        Raises _CursorExpiredError if Gong reports the pagination cursor
+        expired (TTL is ~1 hour from first request in the pagination sequence).
+        """
+        body: dict[str, Any] = {"filter": {}}
        if start_datetime:
            body["filter"]["fromDateTime"] = start_datetime
        if end_datetime:
            body["filter"]["toDateTime"] = end_datetime
+        if workspace_id:
+            body["filter"]["workspaceId"] = workspace_id
+        if cursor:
+            body["cursor"] = cursor

-        # The batch_ids in the previous method appears to be batches of call_ids to process
-        # In this method, we will retrieve transcripts for them in batches.
-        transcripts: list[dict[str, Any]] = []
-        workspace_list = self.workspaces or [None]  # type: ignore
-        workspace_map = self._get_workspace_id_map() if self.workspaces else {}
+        response = self._throttled_request(
+            "POST", GongConnector.make_url("/v2/calls/transcript"), json=body
+        )
+        # If no calls in the range, return empty
+        if response.status_code == 404:
+            return _TranscriptPage(transcripts=[])

-        for workspace in workspace_list:
-            if workspace:
-                logger.info(f"Updating Gong workspace: {workspace}")
-                workspace_id = workspace_map.get(workspace)
-                if not workspace_id:
-                    logger.error(f"Invalid Gong workspace: {workspace}")
-                    if not self.continue_on_fail:
-                        raise ValueError(f"Invalid workspace: {workspace}")
-                    continue
-                body["filter"]["workspaceId"] = workspace_id
-            else:
-                if "workspaceId" in body["filter"]:
-                    del body["filter"]["workspaceId"]
+        if not response.ok:
+            # Cursor expiration comes back as a 4xx with this error message —
+            # detect it before raise_for_status so callers can restart the workspace.
+            if cursor and "cursor has expired" in response.text.lower():
+                raise _CursorExpiredError(response.text)
+            logger.error(f"Error fetching transcripts: {response.text}")
+            response.raise_for_status()

-            while True:
-                response = self._throttled_request(
-                    "POST", GongConnector.make_url("/v2/calls/transcript"), json=body
-                )
-                # If no calls in the range, just break out
-                if response.status_code == 404:
-                    break
+        data = response.json()
+        return _TranscriptPage(
+            transcripts=data.get("callTranscripts", []),
+            next_cursor=data.get("records", {}).get("cursor"),
+        )

-                try:
-                    response.raise_for_status()
-                except Exception:
-                    logger.error(f"Error fetching transcripts: {response.text}")
-                    raise
-
-                data = response.json()
-                call_transcripts = data.get("callTranscripts", [])
-                transcripts.extend(call_transcripts)
-
-                while len(transcripts) >= self.batch_size:
-                    yield transcripts[: self.batch_size]
-                    transcripts = transcripts[self.batch_size :]
-
-                cursor = data.get("records", {}).get("cursor")
-                if cursor:
-                    body["cursor"] = cursor
-                else:
-                    break
-
-        if transcripts:
-            yield transcripts
-
-    def _get_call_details_by_ids(self, call_ids: list[str]) -> dict:
+    def _get_call_details_by_ids(self, call_ids: list[str]) -> dict[str, Any]:
        body = {
            "filter": {"callIds": call_ids},
            "contentSelector": {"exposedFields": {"parties": True}},
@@ -176,6 +187,50 @@ class GongConnector(LoadConnector, PollConnector):

        return call_to_metadata

+    def _fetch_call_details_with_retry(self, call_ids: list[str]) -> dict[str, Any]:
+        """Fetch call details with retry for the Gong API race condition.
+
+        The Gong API has a known race where transcript call IDs don't immediately
+        appear in /v2/calls/extensive. Retries with exponential backoff, only
+        re-requesting the missing IDs on each attempt.
+        """
+        call_details_map = self._get_call_details_by_ids(call_ids)
+        if set(call_ids) == set(call_details_map.keys()):
+            return call_details_map
+
+        for attempt in range(2, self.MAX_CALL_DETAILS_ATTEMPTS + 1):
+            missing_ids = list(set(call_ids) - set(call_details_map.keys()))
+            logger.warning(
+                f"_get_call_details_by_ids is missing call id's: current_attempt={attempt - 1} missing_call_ids={missing_ids}"
+            )
+
+            wait_seconds = self.CALL_DETAILS_DELAY * pow(2, attempt - 2)
+            logger.warning(
+                f"_get_call_details_by_ids waiting to retry: "
+                f"wait={wait_seconds}s "
+                f"current_attempt={attempt - 1} "
+                f"next_attempt={attempt} "
+                f"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}"
+            )
+            time.sleep(wait_seconds)
+
+            # Only re-fetch the missing IDs, merge into existing results
+            new_details = self._get_call_details_by_ids(missing_ids)
+            call_details_map.update(new_details)
+
+            if set(call_ids) == set(call_details_map.keys()):
+                return call_details_map
+
+        missing_ids = list(set(call_ids) - set(call_details_map.keys()))
+        logger.error(
+            f"Giving up on missing call id's after "
+            f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
+            f"missing_call_ids={missing_ids} — "
+            f"proceeding with {len(call_details_map)} of "
+            f"{len(call_ids)} calls"
+        )
+        return call_details_map
+
    @staticmethod
    def _parse_parties(parties: list[dict]) -> dict[str, str]:
        id_mapping = {}
@@ -196,186 +251,46 @@ class GongConnector(LoadConnector, PollConnector):

        return id_mapping

-    def _fetch_calls(
-        self, start_datetime: str | None = None, end_datetime: str | None = None
-    ) -> GenerateDocumentsOutput:
-        num_calls = 0
+    def _resolve_workspace_ids(self) -> list[str | None]:
+        """Resolve configured workspace names/IDs to actual workspace IDs.

-        for transcript_batch in self._get_transcript_batches(
-            start_datetime, end_datetime
-        ):
-            doc_batch: list[Document | HierarchyNode] = []
+        Returns a list of workspace IDs. If no workspaces are configured,
+        returns [None] to indicate "fetch all workspaces".

-            transcript_call_ids = cast(
-                list[str],
-                [t.get("callId") for t in transcript_batch if t.get("callId")],
+        Raises ValueError if workspaces are configured but none resolve —
+        we never silently widen scope to "fetch all" on misconfiguration,
+        because that could ingest an entire Gong account by mistake.
+        """
+        if not self.workspaces:
+            return [None]
+
+        workspace_map = self._get_workspace_id_map()
+        resolved: list[str | None] = []
+        for workspace in self.workspaces:
+            workspace_id = workspace_map.get(workspace)
+            if not workspace_id:
+                logger.error(f"Invalid Gong workspace: {workspace}")
+                continue
+            resolved.append(workspace_id)
+
+        if not resolved:
+            raise ValueError(
+                f"No valid Gong workspaces found — check workspace names/IDs in connector config. Configured: {self.workspaces}"
            )

-            call_details_map: dict[str, Any] = {}
+        return resolved

-            # There's a likely race condition in the API where a transcript will have a
-            # call id but the call to v2/calls/extensive will not return all of the id's
-            # retry with exponential backoff has been observed to mitigate this
-            # in ~2 minutes. After max attempts, proceed with whatever we have —
-            # the per-call loop below will skip missing IDs gracefully.
-            current_attempt = 0
-            while True:
-                current_attempt += 1
-                call_details_map = self._get_call_details_by_ids(transcript_call_ids)
-                if set(transcript_call_ids) == set(call_details_map.keys()):
-                    # we got all the id's we were expecting ... break and continue
-                    break
-
-                # we are missing some id's. Log and retry with exponential backoff
-                missing_call_ids = set(transcript_call_ids) - set(
-                    call_details_map.keys()
-                )
-                logger.warning(
-                    f"_get_call_details_by_ids is missing call id's: "
-                    f"current_attempt={current_attempt} "
-                    f"missing_call_ids={missing_call_ids}"
-                )
-                if current_attempt >= self.MAX_CALL_DETAILS_ATTEMPTS:
-                    logger.error(
-                        f"Giving up on missing call id's after "
-                        f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
-                        f"missing_call_ids={missing_call_ids} — "
-                        f"proceeding with {len(call_details_map)} of "
-                        f"{len(transcript_call_ids)} calls"
-                    )
-                    break
-
-                wait_seconds = self.CALL_DETAILS_DELAY * pow(2, current_attempt - 1)
-                logger.warning(
-                    f"_get_call_details_by_ids waiting to retry: "
-                    f"wait={wait_seconds}s "
-                    f"current_attempt={current_attempt} "
-                    f"next_attempt={current_attempt + 1} "
-                    f"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}"
-                )
-                time.sleep(wait_seconds)
-
-            # now we can iterate per call/transcript
-            for transcript in transcript_batch:
-                call_id = transcript.get("callId")
-
-                if not call_id or call_id not in call_details_map:
-                    # NOTE(rkuo): seeing odd behavior where call_ids from the transcript
-                    # don't have call details. adding error debugging logs to trace.
-                    logger.error(
-                        f"Couldn't get call information for Call ID: {call_id}"
-                    )
-                    if call_id:
-                        logger.error(
-                            f"Call debug info: call_id={call_id} "
-                            f"call_ids={transcript_call_ids} "
-                            f"call_details_map={call_details_map.keys()}"
-                        )
-                    if not self.continue_on_fail:
-                        raise RuntimeError(
-                            f"Couldn't get call information for Call ID: {call_id}"
-                        )
-                    continue
-
-                call_details = call_details_map[call_id]
-                call_metadata = call_details["metaData"]
-
-                call_time_str = call_metadata["started"]
-                call_title = call_metadata["title"]
-                logger.info(
-                    f"{num_calls + 1}: Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
-                )
-
-                call_parties = cast(list[dict] | None, call_details.get("parties"))
-                if call_parties is None:
-                    logger.error(f"Couldn't get parties for Call ID: {call_id}")
-                    call_parties = []
-
-                id_to_name_map = self._parse_parties(call_parties)
-
-                # Keeping a separate dict here in case the parties info is incomplete
-                speaker_to_name: dict[str, str] = {}
-
-                transcript_text = ""
-                call_purpose = call_metadata["purpose"]
-                if call_purpose:
-                    transcript_text += f"Call Description: {call_purpose}\n\n"
-
-                contents = transcript["transcript"]
-                for segment in contents:
-                    speaker_id = segment.get("speakerId", "")
-                    if speaker_id not in speaker_to_name:
-                        if self.hide_user_info:
-                            speaker_to_name[speaker_id] = (
-                                f"User {len(speaker_to_name) + 1}"
-                            )
-                        else:
-                            speaker_to_name[speaker_id] = id_to_name_map.get(
-                                speaker_id, "Unknown"
-                            )
-
-                    speaker_name = speaker_to_name[speaker_id]
-
-                    sentences = segment.get("sentences", {})
-                    monolog = " ".join(
-                        [sentence.get("text", "") for sentence in sentences]
-                    )
-                    transcript_text += f"{speaker_name}: {monolog}\n\n"
-
-                metadata = {}
-                if call_metadata.get("system"):
-                    metadata["client"] = call_metadata.get("system")
-                # TODO calls have a clientUniqueId field, can pull that in later
-
-                doc_batch.append(
-                    Document(
-                        id=call_id,
-                        sections=[
-                            TextSection(link=call_metadata["url"], text=transcript_text)
-                        ],
-                        source=DocumentSource.GONG,
-                        # Should not ever be Untitled as a call cannot be made without a Title
-                        semantic_identifier=call_title or "Untitled",
-                        doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
-                            timezone.utc
-                        ),
-                        metadata={"client": call_metadata.get("system")},
-                    )
-                )
-
-                num_calls += 1
-
-            yield doc_batch
-
-        logger.info(f"_fetch_calls finished: num_calls={num_calls}")
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        combined = (
-            f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
-        )
-        self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
-            "utf-8"
-        )
-
-        if self.auth_token_basic is None:
-            raise ConnectorMissingCredentialError("Gong")
-
-        self._session.headers.update(
-            {"Authorization": f"Basic {self.auth_token_basic}"}
-        )
-        return None
-
-    def load_from_state(self) -> GenerateDocumentsOutput:
-        return self._fetch_calls()
-
-    def poll_source(
-        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
-    ) -> GenerateDocumentsOutput:
+    @staticmethod
+    def _compute_time_range(
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+    ) -> tuple[str, str]:
+        """Compute the start/end datetime strings for the Gong API filter,
+        applying GONG_CONNECTOR_START_TIME and the 1-day offset."""
        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)

        # if this env variable is set, don't start from a timestamp before the specified
        # start time
-        # TODO: remove this once this is globally available
        if GONG_CONNECTOR_START_TIME:
            special_start_datetime = datetime.fromisoformat(GONG_CONNECTOR_START_TIME)
            special_start_datetime = special_start_datetime.replace(tzinfo=timezone.utc)
@@ -394,11 +309,186 @@ class GongConnector(LoadConnector, PollConnector):
        # so adding a 1 day buffer and fetching by default till current time
        start_one_day_offset = start_datetime - timedelta(days=1)
        start_time = start_one_day_offset.isoformat()
+        end_time = end_datetime.isoformat()

-        end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()
+        return start_time, end_time

-        logger.info(f"Fetching Gong calls between {start_time} and {end_time}")
-        return self._fetch_calls(start_time, end_time)
+    def _process_transcripts(
+        self,
+        transcripts: list[dict[str, Any]],
+    ) -> Generator[Document | ConnectorFailure, None, None]:
+        """Process a batch of transcripts into Documents or ConnectorFailures."""
+        transcript_call_ids = cast(
+            list[str],
+            [t.get("callId") for t in transcripts if t.get("callId")],
+        )
+
+        call_details_map = self._fetch_call_details_with_retry(transcript_call_ids)
+
+        for transcript in transcripts:
+            call_id = transcript.get("callId")
+
+            if not call_id or call_id not in call_details_map:
+                logger.error(f"Couldn't get call information for Call ID: {call_id}")
+                if call_id:
+                    logger.error(
+                        f"Call debug info: call_id={call_id} "
+                        f"call_ids={transcript_call_ids} "
+                        f"call_details_map={call_details_map.keys()}"
+                    )
+                yield ConnectorFailure(
+                    failed_document=DocumentFailure(
+                        document_id=call_id or "unknown",
+                    ),
+                    failure_message=f"Couldn't get call information for Call ID: {call_id}",
+                )
+                continue
+
+            call_details = call_details_map[call_id]
+            call_metadata = call_details["metaData"]
+
+            call_time_str = call_metadata["started"]
+            call_title = call_metadata["title"]
+            logger.info(
+                f"Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
+            )
+
+            call_parties = cast(list[dict] | None, call_details.get("parties"))
+            if call_parties is None:
+                logger.error(f"Couldn't get parties for Call ID: {call_id}")
+                call_parties = []
+
+            id_to_name_map = self._parse_parties(call_parties)
+
+            speaker_to_name: dict[str, str] = {}
+
+            transcript_text = ""
+            call_purpose = call_metadata["purpose"]
+            if call_purpose:
+                transcript_text += f"Call Description: {call_purpose}\n\n"
+
+            contents = transcript["transcript"]
+            for segment in contents:
+                speaker_id = segment.get("speakerId", "")
+                if speaker_id not in speaker_to_name:
+                    if self.hide_user_info:
+                        speaker_to_name[speaker_id] = f"User {len(speaker_to_name) + 1}"
+                    else:
+                        speaker_to_name[speaker_id] = id_to_name_map.get(
+                            speaker_id, "Unknown"
+                        )
+
+                speaker_name = speaker_to_name[speaker_id]
+
+                sentences = segment.get("sentences", {})
+                monolog = " ".join([sentence.get("text", "") for sentence in sentences])
+                transcript_text += f"{speaker_name}: {monolog}\n\n"
+
+            yield Document(
+                id=call_id,
+                sections=[TextSection(link=call_metadata["url"], text=transcript_text)],
+                source=DocumentSource.GONG,
+                semantic_identifier=call_title or "Untitled",
+                doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
+                    timezone.utc
+                ),
+                metadata={"client": call_metadata.get("system")},
+            )
+
+    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        combined = (
+            f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
+        )
+        self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
+            "utf-8"
+        )
+
+        if self.auth_token_basic is None:
+            raise ConnectorMissingCredentialError("Gong")
+
+        self._session.headers.update(
+            {"Authorization": f"Basic {self.auth_token_basic}"}
+        )
+        return None
+
+    def build_dummy_checkpoint(self) -> GongConnectorCheckpoint:
+        return GongConnectorCheckpoint(has_more=True)
+
+    def validate_checkpoint_json(self, checkpoint_json: str) -> GongConnectorCheckpoint:
+        return GongConnectorCheckpoint.model_validate_json(checkpoint_json)
+
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: GongConnectorCheckpoint,
+    ) -> CheckpointOutput[GongConnectorCheckpoint]:
+        checkpoint = copy.deepcopy(checkpoint)
+
+        # Step 1: Resolve workspace IDs on first call
+        if checkpoint.workspace_ids is None:
+            checkpoint.workspace_ids = self._resolve_workspace_ids()
+            checkpoint.time_range = self._compute_time_range(start, end)
+            checkpoint.has_more = True
+            return checkpoint
+
+        workspace_ids = checkpoint.workspace_ids
+
+        # If we've exhausted all workspaces, we're done
+        if checkpoint.workspace_index >= len(workspace_ids):
+            checkpoint.has_more = False
+            return checkpoint
+
+        # Use cached time range, falling back to computation if not cached
+        start_time, end_time = checkpoint.time_range or self._compute_time_range(
+            start, end
+        )
+        logger.info(
+            f"Fetching Gong calls between {start_time} and {end_time} "
+            f"(workspace {checkpoint.workspace_index + 1}/{len(workspace_ids)})"
+        )
+
+        workspace_id = workspace_ids[checkpoint.workspace_index]
+
+        # Step 2: Fetch one page of transcripts
+        try:
+            page = self._fetch_transcript_page(
+                start_datetime=start_time,
+                end_datetime=end_time,
+                workspace_id=workspace_id,
+                cursor=checkpoint.cursor,
+            )
+        except _CursorExpiredError:
+            # Gong cursors TTL ~1h from first request in the sequence. If the
+            # checkpoint paused long enough for the cursor to expire, restart
+            # the current workspace from the beginning of the time range.
+            # Document upserts are idempotent (keyed by call_id) so
+            # reprocessing is safe.
+            logger.warning(
+                f"Gong pagination cursor expired for workspace "
+                f"{checkpoint.workspace_index + 1}/{len(workspace_ids)}; "
+                f"restarting workspace from beginning of time range."
+            )
+            checkpoint.cursor = None
+            checkpoint.has_more = True
+            return checkpoint
+
+        # Step 3: Process transcripts into documents
+        if page.transcripts:
+            yield from self._process_transcripts(page.transcripts)
+
+        # Step 4: Update checkpoint state
+        if page.next_cursor:
+            # More pages in this workspace
+            checkpoint.cursor = page.next_cursor
+            checkpoint.has_more = True
+        else:
+            # This workspace is exhausted — advance to next
+            checkpoint.workspace_index += 1
+            checkpoint.cursor = None
+            checkpoint.has_more = checkpoint.workspace_index < len(workspace_ids)
+
+        return checkpoint


 if __name__ == "__main__":
@@ -412,5 +502,13 @@ if __name__ == "__main__":
        }
    )

-    latest_docs = connector.load_from_state()
-    print(next(latest_docs))
+    checkpoint = connector.build_dummy_checkpoint()
+    while checkpoint.has_more:
+        doc_generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(doc_generator)
+                print(item)
+        except StopIteration as e:
+            checkpoint = e.value
+            print(f"Checkpoint: {checkpoint}")
--- a/backend/onyx/db/connector_credential_pair.py
+++ b/backend/onyx/db/connector_credential_pair.py
@@ -750,31 +750,3 @@ def resync_cc_pair(
    )

    db_session.commit()
-
-
-# ── Metrics query helpers ──────────────────────────────────────────────
-
-
-def get_connector_health_for_metrics(
-    db_session: Session,
-) -> list:  # Returns list of Row tuples
-    """Return connector health data for Prometheus metrics.
-
-    Each row is (cc_pair_id, status, in_repeated_error_state,
-    last_successful_index_time, name, source).
-    """
-    return (
-        db_session.query(
-            ConnectorCredentialPair.id,
-            ConnectorCredentialPair.status,
-            ConnectorCredentialPair.in_repeated_error_state,
-            ConnectorCredentialPair.last_successful_index_time,
-            ConnectorCredentialPair.name,
-            Connector.source,
-        )
-        .join(
-            Connector,
-            ConnectorCredentialPair.connector_id == Connector.id,
-        )
-        .all()
-    )
--- a/backend/onyx/db/index_attempt.py
+++ b/backend/onyx/db/index_attempt.py
@@ -2,8 +2,6 @@ from collections.abc import Sequence
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
-from typing import NamedTuple
-from typing import TYPE_CHECKING
 from typing import TypeVarTuple

 from sqlalchemy import and_
@@ -30,17 +28,6 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType

-if TYPE_CHECKING:
-    from onyx.configs.constants import DocumentSource
-
-# from sqlalchemy.sql.selectable import Select
-
-# Comment out unused imports that cause mypy errors
-# from onyx.auth.models import UserRole
-# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS
-# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier
-# from onyx.db.engine import async_query_for_dms
-
 logger = setup_logger()


@@ -977,106 +964,3 @@ def get_index_attempt_errors_for_cc_pair(
        stmt = stmt.offset(page * page_size).limit(page_size)

    return list(db_session.scalars(stmt).all())
-
-
-# ── Metrics query helpers ──────────────────────────────────────────────
-
-
-class ActiveIndexAttemptMetric(NamedTuple):
-    """Row returned by get_active_index_attempts_for_metrics."""
-
-    status: IndexingStatus
-    source: "DocumentSource"
-    cc_pair_id: int
-    cc_pair_name: str | None
-    attempt_count: int
-
-
-def get_active_index_attempts_for_metrics(
-    db_session: Session,
-) -> list[ActiveIndexAttemptMetric]:
-    """Return non-terminal index attempts grouped by status, source, and connector.
-
-    Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
-    """
-    from onyx.db.models import Connector
-
-    terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
-    rows = (
-        db_session.query(
-            IndexAttempt.status,
-            Connector.source,
-            ConnectorCredentialPair.id,
-            ConnectorCredentialPair.name,
-            func.count(),
-        )
-        .join(
-            ConnectorCredentialPair,
-            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
-        )
-        .join(
-            Connector,
-            ConnectorCredentialPair.connector_id == Connector.id,
-        )
-        .filter(IndexAttempt.status.notin_(terminal_statuses))
-        .group_by(
-            IndexAttempt.status,
-            Connector.source,
-            ConnectorCredentialPair.id,
-            ConnectorCredentialPair.name,
-        )
-        .all()
-    )
-    return [ActiveIndexAttemptMetric(*row) for row in rows]
-
-
-def get_failed_attempt_counts_by_cc_pair(
-    db_session: Session,
-    since: datetime | None = None,
-) -> dict[int, int]:
-    """Return {cc_pair_id: failed_attempt_count} for all connectors.
-
-    When ``since`` is provided, only attempts created after that timestamp
-    are counted. Defaults to the last 90 days to avoid unbounded historical
-    aggregation.
-    """
-    if since is None:
-        since = datetime.now(timezone.utc) - timedelta(days=90)
-
-    rows = (
-        db_session.query(
-            IndexAttempt.connector_credential_pair_id,
-            func.count(),
-        )
-        .filter(IndexAttempt.status == IndexingStatus.FAILED)
-        .filter(IndexAttempt.time_created >= since)
-        .group_by(IndexAttempt.connector_credential_pair_id)
-        .all()
-    )
-    return {cc_id: count for cc_id, count in rows}
-
-
-def get_docs_indexed_by_cc_pair(
-    db_session: Session,
-    since: datetime | None = None,
-) -> dict[int, int]:
-    """Return {cc_pair_id: total_new_docs_indexed} across successful attempts.
-
-    Only counts attempts with status SUCCESS to avoid inflating counts with
-    partial results from failed attempts. When ``since`` is provided, only
-    attempts created after that timestamp are included.
-    """
-    if since is None:
-        since = datetime.now(timezone.utc) - timedelta(days=90)
-
-    query = (
-        db_session.query(
-            IndexAttempt.connector_credential_pair_id,
-            func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
-        )
-        .filter(IndexAttempt.status == IndexingStatus.SUCCESS)
-        .filter(IndexAttempt.time_created >= since)
-        .group_by(IndexAttempt.connector_credential_pair_id)
-    )
-    rows = query.all()
-    return {cc_id: int(total or 0) for cc_id, total in rows}
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -23,6 +23,7 @@ import openpyxl
 from openpyxl.worksheet.worksheet import Worksheet
 from PIL import Image

+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
 from onyx.configs.constants import ONYX_METADATA_FILENAME
 from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
 from onyx.file_processing.file_types import OnyxFileExtensions
@@ -191,6 +192,56 @@ def read_text_file(
    return file_content_raw, metadata


+def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
+    """Return the number of embedded images in a PDF, short-circuiting at cap+1.
+
+    Used to reject PDFs whose image count would OOM the user-file-processing
+    worker during indexing. Returns a value > cap as a sentinel once the count
+    exceeds the cap, so callers do not iterate thousands of image objects just
+    to report a number. Returns 0 if the PDF cannot be parsed.
+
+    Owner-password-only PDFs (permission restrictions but no open password) are
+    counted normally — they decrypt with an empty string. Truly password-locked
+    PDFs are skipped (return 0) since we can't inspect them; the caller should
+    ensure the password-protected check runs first.
+
+    Always restores the file pointer to its original position before returning.
+    """
+    from pypdf import PdfReader
+
+    try:
+        start_pos = file.tell()
+    except Exception:
+        start_pos = None
+    try:
+        if start_pos is not None:
+            file.seek(0)
+        reader = PdfReader(file)
+        if reader.is_encrypted:
+            # Try empty password first (owner-password-only PDFs); give up if that fails.
+            try:
+                if reader.decrypt("") == 0:
+                    return 0
+            except Exception:
+                return 0
+        count = 0
+        for page in reader.pages:
+            for _ in page.images:
+                count += 1
+                if count > cap:
+                    return count
+        return count
+    except Exception:
+        logger.warning("Failed to count embedded images in PDF", exc_info=True)
+        return 0
+    finally:
+        if start_pos is not None:
+            try:
+                file.seek(start_pos)
+            except Exception:
+                pass
+
+
 def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
    """
    Extract text from a PDF. For embedded images, a more complex approach is needed.
@@ -254,8 +305,27 @@ def read_pdf_file(
        )

        if extract_images:
+            image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+            images_processed = 0
+            cap_reached = False
            for page_num, page in enumerate(pdf_reader.pages):
+                if cap_reached:
+                    break
                for image_file_object in page.images:
+                    if images_processed >= image_cap:
+                        # Defense-in-depth backstop. Upload-time validation
+                        # should have rejected files exceeding the cap, but
+                        # we also break here so a single oversized file can
+                        # never pin a worker.
+                        logger.warning(
+                            "PDF embedded image cap reached (%d). "
+                            "Skipping remaining images on page %d and beyond.",
+                            image_cap,
+                            page_num + 1,
+                        )
+                        cap_reached = True
+                        break
+
                    image = Image.open(io.BytesIO(image_file_object.data))
                    img_byte_arr = io.BytesIO()
                    image.save(img_byte_arr, format=image.format)
@@ -268,6 +338,7 @@ def read_pdf_file(
                        image_callback(img_bytes, image_name)
                    else:
                        extracted_images.append((img_bytes, image_name))
+                    images_processed += 1

        return text, metadata, extracted_images

--- a/backend/onyx/server/features/build/api/user_library.py
+++ b/backend/onyx/server/features/build/api/user_library.py
@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session

 from onyx.auth.permissions import require_permission
 from onyx.background.celery.versioned_apps.client import app as celery_app
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -51,6 +53,9 @@ from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import Permission
 from onyx.db.models import User
 from onyx.document_index.interfaces import DocumentMetadata
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
 from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
@@ -128,6 +133,49 @@ class DeleteFileResponse(BaseModel):
 # =============================================================================


+def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
+    """True if either the filename or the content-type indicates a PDF.
+
+    Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
+    ``Content-Type: application/octet-stream``), so we also fall back to
+    extension-based detection via ``mimetypes.guess_type`` on the filename.
+    """
+    if content_type == "application/pdf":
+        return True
+    guessed, _ = mimetypes.guess_type(filename)
+    return guessed == "application/pdf"
+
+
+def _check_pdf_image_caps(
+    filename: str, content: bytes, content_type: str | None, batch_total: int
+) -> int:
+    """Enforce per-file and per-batch embedded-image caps for PDFs.
+
+    Returns the number of embedded images in this file (0 for non-PDFs) so
+    callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
+    if either cap is exceeded.
+    """
+    if not _looks_like_pdf(filename, content_type):
+        return 0
+    file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+    batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
+    # Short-circuit at the larger cap so we get a useful count for both checks.
+    count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
+    if count > file_cap:
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            f"PDF '{filename}' contains too many embedded images "
+            f"(more than {file_cap}). Try splitting the document into smaller files.",
+        )
+    if batch_total + count > batch_cap:
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            f"Upload would exceed the {batch_cap}-image limit across all "
+            f"files in this batch. Try uploading fewer image-heavy files at once.",
+        )
+    return count
+
+
 def _sanitize_path(path: str) -> str:
    """Sanitize a file path, removing traversal attempts and normalizing.

@@ -356,6 +404,7 @@ async def upload_files(

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
+    batch_image_total = 0
    now = datetime.now(timezone.utc)

    # Sanitize the base path
@@ -375,6 +424,14 @@ async def upload_files(
                detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB",
            )

+        # Reject PDFs with an unreasonable per-file or per-batch image count
+        batch_image_total += _check_pdf_image_caps(
+            filename=file.filename or "unnamed",
+            content=content,
+            content_type=file.content_type,
+            batch_total=batch_image_total,
+        )
+
        # Validate cumulative storage (existing + this upload batch)
        total_size += file_size
        if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
@@ -473,6 +530,7 @@ async def upload_zip(

    uploaded_entries: list[LibraryEntryResponse] = []
    total_size = 0
+    batch_image_total = 0

    # Extract zip contents into a subfolder named after the zip file
    zip_name = api_sanitize_filename(file.filename or "upload")
@@ -511,6 +569,36 @@ async def upload_zip(
                    logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
                    continue

+                # Skip PDFs that would trip the per-file or per-batch image
+                # cap (would OOM the user-file-processing worker). Matches
+                # /upload behavior but uses skip-and-warn to stay consistent
+                # with the zip path's handling of oversized files.
+                zip_file_name = zip_info.filename.split("/")[-1]
+                zip_content_type, _ = mimetypes.guess_type(zip_file_name)
+                if zip_content_type == "application/pdf":
+                    image_count = count_pdf_embedded_images(
+                        BytesIO(file_content),
+                        max(
+                            MAX_EMBEDDED_IMAGES_PER_FILE,
+                            MAX_EMBEDDED_IMAGES_PER_UPLOAD,
+                        ),
+                    )
+                    if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
+                        logger.warning(
+                            "Skipping '%s' - exceeds %d per-file embedded-image cap",
+                            zip_info.filename,
+                            MAX_EMBEDDED_IMAGES_PER_FILE,
+                        )
+                        continue
+                    if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
+                        logger.warning(
+                            "Skipping '%s' - would exceed %d per-batch embedded-image cap",
+                            zip_info.filename,
+                            MAX_EMBEDDED_IMAGES_PER_UPLOAD,
+                        )
+                        continue
+                    batch_image_total += image_count
+
                total_size += file_size

                # Validate cumulative storage
--- a/backend/onyx/server/features/projects/projects_file_utils.py
+++ b/backend/onyx/server/features/projects/projects_file_utils.py
@@ -9,7 +9,10 @@ from pydantic import ConfigDict
 from pydantic import Field
 from sqlalchemy.orm import Session

+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
+from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
 from onyx.db.llm import fetch_default_llm_model
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
 from onyx.file_processing.file_types import OnyxFileExtensions
@@ -190,6 +193,11 @@ def categorize_uploaded_files(
        token_threshold_k * 1000 if token_threshold_k else None
    )  # 0 → None = no limit

+    # Running total of embedded images across PDFs in this batch. Once the
+    # aggregate cap is reached, subsequent PDFs in the same upload are
+    # rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
+    batch_image_total = 0
+
    for upload in files:
        try:
            filename = get_safe_filename(upload)
@@ -252,6 +260,47 @@ def categorize_uploaded_files(
                    )
                    continue

+                # Reject PDFs with an unreasonable number of embedded images
+                # (either per-file or accumulated across this upload batch).
+                # A PDF with thousands of embedded images can OOM the
+                # user-file-processing celery worker because every image is
+                # decoded with PIL and then sent to the vision LLM.
+                if extension == ".pdf":
+                    file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
+                    batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
+                    # Use the larger of the two caps as the short-circuit
+                    # threshold so we get a useful count for both checks.
+                    # count_pdf_embedded_images restores the stream position.
+                    count = count_pdf_embedded_images(
+                        upload.file, max(file_cap, batch_cap)
+                    )
+                    if count > file_cap:
+                        results.rejected.append(
+                            RejectedFile(
+                                filename=filename,
+                                reason=(
+                                    f"PDF contains too many embedded images "
+                                    f"(more than {file_cap}). Try splitting "
+                                    f"the document into smaller files."
+                                ),
+                            )
+                        )
+                        continue
+                    if batch_image_total + count > batch_cap:
+                        results.rejected.append(
+                            RejectedFile(
+                                filename=filename,
+                                reason=(
+                                    f"Upload would exceed the "
+                                    f"{batch_cap}-image limit across all "
+                                    f"files in this batch. Try uploading "
+                                    f"fewer image-heavy files at once."
+                                ),
+                            )
+                        )
+                        continue
+                    batch_image_total += count
+
                text_content = extract_file_text(
                    file=upload.file,
                    file_name=filename,
--- a/backend/onyx/server/metrics/celery_task_metrics.py
+++ b/backend/onyx/server/metrics/celery_task_metrics.py
@@ -1,7 +1,8 @@
 """Generic Celery task lifecycle Prometheus metrics.

 Provides signal handlers that track task started/completed/failed counts,
-active task gauge, task duration histograms, and retry/reject/revoke counts.
+active task gauge, task duration histograms, queue wait time histograms,
+and retry/reject/revoke counts.
 These fire for ALL tasks on the worker — no per-connector enrichment
 (see indexing_task_metrics.py for that).

@@ -71,6 +72,32 @@ TASK_REJECTED = Counter(
    ["task_name"],
 )

+TASK_QUEUE_WAIT = Histogram(
+    "onyx_celery_task_queue_wait_seconds",
+    "Time a Celery task spent waiting in the queue before execution started",
+    ["task_name", "queue"],
+    buckets=[
+        0.1,
+        0.5,
+        1,
+        5,
+        30,
+        60,
+        300,
+        600,
+        1800,
+        3600,
+        7200,
+        14400,
+        28800,
+        43200,
+        86400,
+        172800,
+        432000,
+        864000,
+    ],
+)
+
 # task_id → (monotonic start time, metric labels)
 _task_start_times: dict[str, tuple[float, dict[str, str]]] = {}

@@ -133,6 +160,13 @@ def on_celery_task_prerun(
        with _task_start_times_lock:
            _evict_stale_start_times()
            _task_start_times[task_id] = (time.monotonic(), labels)
+
+        headers = getattr(task.request, "headers", None) or {}
+        enqueued_at = headers.get("enqueued_at")
+        if isinstance(enqueued_at, (int, float)):
+            TASK_QUEUE_WAIT.labels(**labels).observe(
+                max(0.0, time.time() - enqueued_at)
+            )
    except Exception:
        logger.debug("Failed to record celery task prerun metrics", exc_info=True)

--- a/backend/onyx/server/metrics/connector_health_metrics.py
+++ b/backend/onyx/server/metrics/connector_health_metrics.py
@@ -0,0 +1,123 @@
+"""Prometheus metrics for connector health and index attempts.
+
+Emitted by docfetching and docprocessing workers when connector or
+index attempt state changes. All functions silently catch exceptions
+to avoid disrupting the caller's business logic.
+
+Gauge metrics (error state, last success timestamp) are per-process.
+With multiple worker pods, use max() aggregation in PromQL to get the
+correct value across instances, e.g.:
+    max by (cc_pair_id, connector_name) (onyx_connector_in_error_state)
+
+Unlike the per-task counters in indexing_task_metrics.py, these metrics
+include connector_name because their cardinality is bounded by the number
+of connectors (one series per connector), not by the number of task
+executions.
+"""
+
+from prometheus_client import Counter
+from prometheus_client import Gauge
+
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_CONNECTOR_LABELS = ["tenant_id", "source", "cc_pair_id", "connector_name"]
+
+# --- Index attempt lifecycle ---
+
+INDEX_ATTEMPT_STATUS = Counter(
+    "onyx_index_attempt_transitions_total",
+    "Index attempt status transitions",
+    [*_CONNECTOR_LABELS, "status"],
+)
+
+# --- Connector health ---
+
+CONNECTOR_IN_ERROR_STATE = Gauge(
+    "onyx_connector_in_error_state",
+    "Whether the connector is in a repeated error state (1=yes, 0=no)",
+    _CONNECTOR_LABELS,
+)
+
+CONNECTOR_LAST_SUCCESS_TIMESTAMP = Gauge(
+    "onyx_connector_last_success_timestamp_seconds",
+    "Unix timestamp of last successful indexing for this connector",
+    _CONNECTOR_LABELS,
+)
+
+CONNECTOR_DOCS_INDEXED = Counter(
+    "onyx_connector_docs_indexed_total",
+    "Total documents indexed per connector (monotonic)",
+    _CONNECTOR_LABELS,
+)
+
+CONNECTOR_INDEXING_ERRORS = Counter(
+    "onyx_connector_indexing_errors_total",
+    "Total failed index attempts per connector (monotonic)",
+    _CONNECTOR_LABELS,
+)
+
+
+def on_index_attempt_status_change(
+    tenant_id: str,
+    source: str,
+    cc_pair_id: int,
+    connector_name: str,
+    status: str,
+) -> None:
+    """Called on any index attempt status transition."""
+    try:
+        labels = {
+            "tenant_id": tenant_id,
+            "source": source,
+            "cc_pair_id": str(cc_pair_id),
+            "connector_name": connector_name,
+        }
+        INDEX_ATTEMPT_STATUS.labels(**labels, status=status).inc()
+        if status == "failed":
+            CONNECTOR_INDEXING_ERRORS.labels(**labels).inc()
+    except Exception:
+        logger.debug("Failed to record index attempt status metric", exc_info=True)
+
+
+def on_connector_error_state_change(
+    tenant_id: str,
+    source: str,
+    cc_pair_id: int,
+    connector_name: str,
+    in_error: bool,
+) -> None:
+    """Called when a connector's in_repeated_error_state changes."""
+    try:
+        CONNECTOR_IN_ERROR_STATE.labels(
+            tenant_id=tenant_id,
+            source=source,
+            cc_pair_id=str(cc_pair_id),
+            connector_name=connector_name,
+        ).set(1.0 if in_error else 0.0)
+    except Exception:
+        logger.debug("Failed to record connector error state metric", exc_info=True)
+
+
+def on_connector_indexing_success(
+    tenant_id: str,
+    source: str,
+    cc_pair_id: int,
+    connector_name: str,
+    docs_indexed: int,
+    success_timestamp: float,
+) -> None:
+    """Called when an indexing run completes successfully."""
+    try:
+        labels = {
+            "tenant_id": tenant_id,
+            "source": source,
+            "cc_pair_id": str(cc_pair_id),
+            "connector_name": connector_name,
+        }
+        CONNECTOR_LAST_SUCCESS_TIMESTAMP.labels(**labels).set(success_timestamp)
+        if docs_indexed > 0:
+            CONNECTOR_DOCS_INDEXED.labels(**labels).inc(docs_indexed)
+    except Exception:
+        logger.debug("Failed to record connector success metric", exc_info=True)
--- a/backend/onyx/server/metrics/deletion_metrics.py
+++ b/backend/onyx/server/metrics/deletion_metrics.py
@@ -0,0 +1,104 @@
+"""Connector-deletion-specific Prometheus metrics.
+
+Tracks the deletion lifecycle:
+  1. Deletions started (taskset generated)
+  2. Deletions completed (success or failure)
+  3. Taskset duration (from taskset generation to completion or failure).
+     Note: this measures the most recent taskset execution, NOT wall-clock
+     time since the user triggered the deletion. When deletion is blocked by
+     indexing/pruning/permissions, the fence is cleared and a fresh taskset
+     is generated on each retry, resetting this timer.
+  4. Deletion blocked by dependencies (indexing, pruning, permissions, etc.)
+  5. Fence resets (stuck deletion recovery)
+
+All metrics are labeled by tenant_id. cc_pair_id is intentionally excluded
+to avoid unbounded cardinality.
+
+Usage:
+    from onyx.server.metrics.deletion_metrics import (
+        inc_deletion_started,
+        inc_deletion_completed,
+        observe_deletion_taskset_duration,
+        inc_deletion_blocked,
+        inc_deletion_fence_reset,
+    )
+"""
+
+from prometheus_client import Counter
+from prometheus_client import Histogram
+
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+DELETION_STARTED = Counter(
+    "onyx_deletion_started_total",
+    "Connector deletions initiated (taskset generated)",
+    ["tenant_id"],
+)
+
+DELETION_COMPLETED = Counter(
+    "onyx_deletion_completed_total",
+    "Connector deletions completed",
+    ["tenant_id", "outcome"],
+)
+
+DELETION_TASKSET_DURATION = Histogram(
+    "onyx_deletion_taskset_duration_seconds",
+    "Duration of a connector deletion taskset, from taskset generation "
+    "to completion or failure. Does not include time spent blocked on "
+    "indexing/pruning/permissions before the taskset was generated.",
+    ["tenant_id", "outcome"],
+    buckets=[10, 30, 60, 120, 300, 600, 1800, 3600, 7200, 21600],
+)
+
+DELETION_BLOCKED = Counter(
+    "onyx_deletion_blocked_total",
+    "Times deletion was blocked by a dependency",
+    ["tenant_id", "blocker"],
+)
+
+DELETION_FENCE_RESET = Counter(
+    "onyx_deletion_fence_reset_total",
+    "Deletion fences reset due to missing celery tasks",
+    ["tenant_id"],
+)
+
+
+def inc_deletion_started(tenant_id: str) -> None:
+    try:
+        DELETION_STARTED.labels(tenant_id=tenant_id).inc()
+    except Exception:
+        logger.debug("Failed to record deletion started", exc_info=True)
+
+
+def inc_deletion_completed(tenant_id: str, outcome: str) -> None:
+    try:
+        DELETION_COMPLETED.labels(tenant_id=tenant_id, outcome=outcome).inc()
+    except Exception:
+        logger.debug("Failed to record deletion completed", exc_info=True)
+
+
+def observe_deletion_taskset_duration(
+    tenant_id: str, outcome: str, duration_seconds: float
+) -> None:
+    try:
+        DELETION_TASKSET_DURATION.labels(tenant_id=tenant_id, outcome=outcome).observe(
+            duration_seconds
+        )
+    except Exception:
+        logger.debug("Failed to record deletion taskset duration", exc_info=True)
+
+
+def inc_deletion_blocked(tenant_id: str, blocker: str) -> None:
+    try:
+        DELETION_BLOCKED.labels(tenant_id=tenant_id, blocker=blocker).inc()
+    except Exception:
+        logger.debug("Failed to record deletion blocked", exc_info=True)
+
+
+def inc_deletion_fence_reset(tenant_id: str) -> None:
+    try:
+        DELETION_FENCE_RESET.labels(tenant_id=tenant_id).inc()
+    except Exception:
+        logger.debug("Failed to record deletion fence reset", exc_info=True)
--- a/backend/onyx/server/metrics/indexing_pipeline.py
+++ b/backend/onyx/server/metrics/indexing_pipeline.py
@@ -1,25 +1,30 @@
-"""Prometheus collectors for Celery queue depths and indexing pipeline state.
+"""Prometheus collectors for Celery queue depths and infrastructure health.

-These collectors query Redis and Postgres at scrape time (the Collector pattern),
+These collectors query Redis at scrape time (the Collector pattern),
 so metrics are always fresh when Prometheus scrapes /metrics. They run inside the
-monitoring celery worker which already has Redis and DB access.
+monitoring celery worker which already has Redis access.

-To avoid hammering Redis/Postgres on every 15s scrape, results are cached with
+To avoid hammering Redis on every 15s scrape, results are cached with
 a configurable TTL (default 30s). This means metrics may be up to TTL seconds
 stale, which is fine for monitoring dashboards.
+
+Note: connector health and index attempt metrics are push-based (emitted by
+workers at state-change time) and live in connector_health_metrics.py.
 """

+from __future__ import annotations
+
+import concurrent.futures
 import json
 import threading
 import time
-from datetime import datetime
-from datetime import timezone
 from typing import Any

 from prometheus_client.core import GaugeMetricFamily
 from prometheus_client.registry import Collector
 from redis import Redis

+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.configs.constants import OnyxCeleryQueues
@@ -31,6 +36,11 @@ logger = setup_logger()
 # the previous result without re-querying Redis/Postgres.
 _DEFAULT_CACHE_TTL = 30.0

+# Maximum time (seconds) a single _collect_fresh() call may take before
+# the collector gives up and returns stale/empty results. Prevents the
+# /metrics endpoint from hanging indefinitely when a DB or Redis query stalls.
+_DEFAULT_COLLECT_TIMEOUT = 120.0
+
 _QUEUE_LABEL_MAP: dict[str, str] = {
    OnyxCeleryQueues.PRIMARY: "primary",
    OnyxCeleryQueues.DOCPROCESSING: "docprocessing",
@@ -62,18 +72,32 @@ _UNACKED_QUEUES: list[str] = [


 class _CachedCollector(Collector):
-    """Base collector with TTL-based caching.
+    """Base collector with TTL-based caching and timeout protection.

    Subclasses implement ``_collect_fresh()`` to query the actual data source.
    The base ``collect()`` returns cached results if the TTL hasn't expired,
    avoiding repeated queries when Prometheus scrapes frequently.
+
+    A per-collection timeout prevents a slow DB or Redis query from blocking
+    the /metrics endpoint indefinitely. If _collect_fresh() exceeds the
+    timeout, stale cached results are returned instead.
    """

-    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
+    def __init__(
+        self,
+        cache_ttl: float = _DEFAULT_CACHE_TTL,
+        collect_timeout: float = _DEFAULT_COLLECT_TIMEOUT,
+    ) -> None:
        self._cache_ttl = cache_ttl
+        self._collect_timeout = collect_timeout
        self._cached_result: list[GaugeMetricFamily] | None = None
        self._last_collect_time: float = 0.0
        self._lock = threading.Lock()
+        self._executor = concurrent.futures.ThreadPoolExecutor(
+            max_workers=1,
+            thread_name_prefix=type(self).__name__,
+        )
+        self._inflight: concurrent.futures.Future | None = None

    def collect(self) -> list[GaugeMetricFamily]:
        with self._lock:
@@ -84,12 +108,28 @@ class _CachedCollector(Collector):
            ):
                return self._cached_result

+            # If a previous _collect_fresh() is still running, wait on it
+            # rather than queuing another. This prevents unbounded task
+            # accumulation in the executor during extended DB outages.
+            if self._inflight is not None and not self._inflight.done():
+                future = self._inflight
+            else:
+                future = self._executor.submit(self._collect_fresh)
+                self._inflight = future
+
            try:
-                result = self._collect_fresh()
+                result = future.result(timeout=self._collect_timeout)
+                self._inflight = None
                self._cached_result = result
                self._last_collect_time = now
                return result
+            except concurrent.futures.TimeoutError:
+                logger.warning(
+                    f"{type(self).__name__}._collect_fresh() timed out after {self._collect_timeout}s, returning stale cache"
+                )
+                return self._cached_result if self._cached_result is not None else []
            except Exception:
+                self._inflight = None
                logger.exception(f"Error in {type(self).__name__}.collect()")
                # Return stale cache on error rather than nothing — avoids
                # metrics disappearing during transient failures.
@@ -117,8 +157,6 @@ class QueueDepthCollector(_CachedCollector):
        if self._celery_app is None:
            return []

-        from onyx.background.celery.celery_redis import celery_get_broker_client
-
        redis_client = celery_get_broker_client(self._celery_app)

        depth = GaugeMetricFamily(
@@ -194,208 +232,6 @@ class QueueDepthCollector(_CachedCollector):
        return None


-class IndexAttemptCollector(_CachedCollector):
-    """Queries Postgres for index attempt state on each scrape."""
-
-    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
-        super().__init__(cache_ttl)
-        self._configured: bool = False
-        self._terminal_statuses: list = []
-
-    def configure(self) -> None:
-        """Call once DB engine is initialized."""
-        from onyx.db.enums import IndexingStatus
-
-        self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
-        self._configured = True
-
-    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if not self._configured:
-            return []
-
-        from onyx.db.engine.sql_engine import get_session_with_current_tenant
-        from onyx.db.engine.tenant_utils import get_all_tenant_ids
-        from onyx.db.index_attempt import get_active_index_attempts_for_metrics
-        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-
-        attempts_gauge = GaugeMetricFamily(
-            "onyx_index_attempts_active",
-            "Number of non-terminal index attempts",
-            labels=[
-                "status",
-                "source",
-                "tenant_id",
-                "connector_name",
-                "cc_pair_id",
-            ],
-        )
-
-        tenant_ids = get_all_tenant_ids()
-
-        for tid in tenant_ids:
-            # Defensive guard — get_all_tenant_ids() should never yield None,
-            # but we guard here for API stability in case the contract changes.
-            if tid is None:
-                continue
-            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
-            try:
-                with get_session_with_current_tenant() as session:
-                    rows = get_active_index_attempts_for_metrics(session)
-
-                    for status, source, cc_id, cc_name, count in rows:
-                        name_val = cc_name or f"cc_pair_{cc_id}"
-                        attempts_gauge.add_metric(
-                            [
-                                status.value,
-                                source.value,
-                                tid,
-                                name_val,
-                                str(cc_id),
-                            ],
-                            count,
-                        )
-            finally:
-                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-        return [attempts_gauge]
-
-
-class ConnectorHealthCollector(_CachedCollector):
-    """Queries Postgres for connector health state on each scrape."""
-
-    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
-        super().__init__(cache_ttl)
-        self._configured: bool = False
-
-    def configure(self) -> None:
-        """Call once DB engine is initialized."""
-        self._configured = True
-
-    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if not self._configured:
-            return []
-
-        from onyx.db.connector_credential_pair import (
-            get_connector_health_for_metrics,
-        )
-        from onyx.db.engine.sql_engine import get_session_with_current_tenant
-        from onyx.db.engine.tenant_utils import get_all_tenant_ids
-        from onyx.db.index_attempt import get_docs_indexed_by_cc_pair
-        from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair
-        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-
-        staleness_gauge = GaugeMetricFamily(
-            "onyx_connector_last_success_age_seconds",
-            "Seconds since last successful index for this connector",
-            labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
-        )
-        error_state_gauge = GaugeMetricFamily(
-            "onyx_connector_in_error_state",
-            "Whether the connector is in a repeated error state (1=yes, 0=no)",
-            labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
-        )
-        by_status_gauge = GaugeMetricFamily(
-            "onyx_connectors_by_status",
-            "Number of connectors grouped by status",
-            labels=["tenant_id", "status"],
-        )
-        error_total_gauge = GaugeMetricFamily(
-            "onyx_connectors_in_error_total",
-            "Total number of connectors in repeated error state",
-            labels=["tenant_id"],
-        )
-        per_connector_labels = [
-            "tenant_id",
-            "source",
-            "cc_pair_id",
-            "connector_name",
-        ]
-        docs_success_gauge = GaugeMetricFamily(
-            "onyx_connector_docs_indexed",
-            "Total new documents indexed (90-day rolling sum) per connector",
-            labels=per_connector_labels,
-        )
-        docs_error_gauge = GaugeMetricFamily(
-            "onyx_connector_error_count",
-            "Total number of failed index attempts per connector",
-            labels=per_connector_labels,
-        )
-
-        now = datetime.now(tz=timezone.utc)
-        tenant_ids = get_all_tenant_ids()
-
-        for tid in tenant_ids:
-            # Defensive guard — get_all_tenant_ids() should never yield None,
-            # but we guard here for API stability in case the contract changes.
-            if tid is None:
-                continue
-            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
-            try:
-                with get_session_with_current_tenant() as session:
-                    pairs = get_connector_health_for_metrics(session)
-                    error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)
-                    docs_by_cc = get_docs_indexed_by_cc_pair(session)
-
-                    status_counts: dict[str, int] = {}
-                    error_count = 0
-
-                    for (
-                        cc_id,
-                        status,
-                        in_error,
-                        last_success,
-                        cc_name,
-                        source,
-                    ) in pairs:
-                        cc_id_str = str(cc_id)
-                        source_val = source.value
-                        name_val = cc_name or f"cc_pair_{cc_id}"
-                        label_vals = [tid, source_val, cc_id_str, name_val]
-
-                        if last_success is not None:
-                            # Both `now` and `last_success` are timezone-aware
-                            # (the DB column uses DateTime(timezone=True)),
-                            # so subtraction is safe.
-                            age = (now - last_success).total_seconds()
-                            staleness_gauge.add_metric(label_vals, age)
-
-                        error_state_gauge.add_metric(
-                            label_vals,
-                            1.0 if in_error else 0.0,
-                        )
-                        if in_error:
-                            error_count += 1
-
-                        docs_success_gauge.add_metric(
-                            label_vals,
-                            docs_by_cc.get(cc_id, 0),
-                        )
-
-                        docs_error_gauge.add_metric(
-                            label_vals,
-                            error_counts_by_cc.get(cc_id, 0),
-                        )
-
-                        status_val = status.value
-                        status_counts[status_val] = status_counts.get(status_val, 0) + 1
-
-                    for status_val, count in status_counts.items():
-                        by_status_gauge.add_metric([tid, status_val], count)
-
-                    error_total_gauge.add_metric([tid], error_count)
-            finally:
-                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-        return [
-            staleness_gauge,
-            error_state_gauge,
-            by_status_gauge,
-            error_total_gauge,
-            docs_success_gauge,
-            docs_error_gauge,
-        ]
-
-
 class RedisHealthCollector(_CachedCollector):
    """Collects Redis server health metrics (memory, clients, etc.)."""

@@ -411,8 +247,6 @@ class RedisHealthCollector(_CachedCollector):
        if self._celery_app is None:
            return []

-        from onyx.background.celery.celery_redis import celery_get_broker_client
-
        redis_client = celery_get_broker_client(self._celery_app)

        memory_used = GaugeMetricFamily(
@@ -495,7 +329,9 @@ class WorkerHeartbeatMonitor:
                        },
                    )
                    recv.capture(
-                        limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True
+                        limit=None,
+                        timeout=self._HEARTBEAT_TIMEOUT_SECONDS,
+                        wakeup=True,
                    )
            except Exception:
                if self._running:
@@ -553,6 +389,15 @@ class WorkerHealthCollector(_CachedCollector):

    Reads worker status from ``WorkerHeartbeatMonitor`` which listens
    to the Celery event stream via a single persistent connection.
+
+    TODO: every monitoring pod subscribes to the cluster-wide Celery event
+    stream, so each replica reports health for *all* workers in the cluster,
+    not just itself. Prometheus distinguishes the replicas via the ``instance``
+    label, so this doesn't break scraping, but it means N monitoring replicas
+    do N× the work and may emit slightly inconsistent snapshots of the same
+    cluster. The proper fix is to have each worker expose its own health (or
+    to elect a single monitoring replica as the reporter) rather than
+    broadcasting the full cluster view from every monitoring pod.
    """

    def __init__(self, cache_ttl: float = 30.0) -> None:
@@ -571,10 +416,16 @@ class WorkerHealthCollector(_CachedCollector):
            "onyx_celery_active_worker_count",
            "Number of active Celery workers with recent heartbeats",
        )
+        # Celery hostnames are ``{worker_type}@{nodename}`` (see supervisord.conf).
+        # Emitting only the worker_type as a label causes N replicas of the same
+        # type to collapse into identical timeseries within a single scrape,
+        # which Prometheus rejects as "duplicate sample for timestamp". Split
+        # the pieces into separate labels so each replica is distinct; callers
+        # can still ``sum by (worker_type)`` to recover the old aggregated view.
        worker_up = GaugeMetricFamily(
            "onyx_celery_worker_up",
            "Whether a specific Celery worker is alive (1=up, 0=down)",
-            labels=["worker"],
+            labels=["worker_type", "hostname"],
        )

        try:
@@ -582,11 +433,15 @@ class WorkerHealthCollector(_CachedCollector):
            alive_count = sum(1 for alive in status.values() if alive)
            active_workers.add_metric([], alive_count)

-            for hostname in sorted(status):
-                # Use short name (before @) for single-host deployments,
-                # full hostname when multiple hosts share a worker type.
-                label = hostname.split("@")[0]
-                worker_up.add_metric([label], 1 if status[hostname] else 0)
+            for full_hostname in sorted(status):
+                worker_type, sep, host = full_hostname.partition("@")
+                if not sep:
+                    # Hostname didn't contain "@" — fall back to using the
+                    # whole string as the hostname with an empty type.
+                    worker_type, host = "", full_hostname
+                worker_up.add_metric(
+                    [worker_type, host], 1 if status[full_hostname] else 0
+                )
        except Exception:
            logger.debug("Failed to collect worker health metrics", exc_info=True)

--- a/backend/onyx/server/metrics/indexing_pipeline_setup.py
+++ b/backend/onyx/server/metrics/indexing_pipeline_setup.py
@@ -6,8 +6,6 @@ Called once by the monitoring celery worker after Redis and DB are ready.
 from celery import Celery
 from prometheus_client.registry import REGISTRY

-from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
-from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
 from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
 from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
 from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
@@ -21,8 +19,6 @@ logger = setup_logger()
 # module level ensures they survive the lifetime of the worker process and are
 # only registered with the Prometheus registry once.
 _queue_collector = QueueDepthCollector()
-_attempt_collector = IndexAttemptCollector()
-_connector_collector = ConnectorHealthCollector()
 _redis_health_collector = RedisHealthCollector()
 _worker_health_collector = WorkerHealthCollector()
 _heartbeat_monitor: WorkerHeartbeatMonitor | None = None
@@ -34,6 +30,9 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
    Args:
        celery_app: The Celery application instance. Used to obtain a
            broker Redis client on each scrape for queue depth metrics.
+
+    Note: connector health and index attempt metrics are push-based
+    (see connector_health_metrics.py) and do not use collectors.
    """
    _queue_collector.set_celery_app(celery_app)
    _redis_health_collector.set_celery_app(celery_app)
@@ -47,13 +46,8 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
        _heartbeat_monitor.start()
    _worker_health_collector.set_monitor(_heartbeat_monitor)

-    _attempt_collector.configure()
-    _connector_collector.configure()
-
    for collector in (
        _queue_collector,
-        _attempt_collector,
-        _connector_collector,
        _redis_health_collector,
        _worker_health_collector,
    ):
--- a/backend/onyx/server/metrics/metrics_server.py
+++ b/backend/onyx/server/metrics/metrics_server.py
@@ -27,6 +27,8 @@ _DEFAULT_PORTS: dict[str, int] = {
    "docfetching": 9092,
    "docprocessing": 9093,
    "heavy": 9094,
+    "light": 9095,
+    "primary": 9097,
 }

 _server_started = False
--- a/backend/onyx/server/metrics/pruning_metrics.py
+++ b/backend/onyx/server/metrics/pruning_metrics.py
@@ -28,14 +28,14 @@ PRUNING_ENUMERATION_DURATION = Histogram(
    "onyx_pruning_enumeration_duration_seconds",
    "Duration of document ID enumeration from the source connector during pruning",
    ["connector_type"],
-    buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
+    buckets=[5, 60, 600, 1800, 3600, 10800, 21600],
 )

 PRUNING_DIFF_DURATION = Histogram(
    "onyx_pruning_diff_duration_seconds",
    "Duration of diff computation and subtask dispatch during pruning",
    ["connector_type"],
-    buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
+    buckets=[0.1, 0.25, 0.5, 1, 2, 5, 15, 30, 60],
 )

 PRUNING_RATE_LIMIT_ERRORS = Counter(
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -214,7 +214,9 @@ distro==1.9.0
 dnspython==2.8.0
    # via email-validator
 docstring-parser==0.17.0
-    # via cyclopts
+    # via
+    #   cyclopts
+    #   google-cloud-aiplatform
 docutils==0.22.3
    # via rich-rst
 dropbox==12.0.2
@@ -270,7 +272,13 @@ gitdb==4.0.12
 gitpython==3.1.45
    # via braintrust
 google-api-core==2.28.1
-    # via google-api-python-client
+    # via
+    #   google-api-python-client
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
 google-api-python-client==2.86.0
 google-auth==2.48.0
    # via
@@ -278,21 +286,61 @@ google-auth==2.48.0
    #   google-api-python-client
    #   google-auth-httplib2
    #   google-auth-oauthlib
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
    #   google-genai
    #   kubernetes
 google-auth-httplib2==0.1.0
    # via google-api-python-client
 google-auth-oauthlib==1.0.0
+google-cloud-aiplatform==1.133.0
+    # via litellm
+google-cloud-bigquery==3.41.0
+    # via google-cloud-aiplatform
+google-cloud-core==2.5.1
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+google-cloud-resource-manager==1.17.0
+    # via google-cloud-aiplatform
+google-cloud-storage==3.10.1
+    # via google-cloud-aiplatform
+google-crc32c==1.8.0
+    # via
+    #   google-cloud-storage
+    #   google-resumable-media
 google-genai==1.52.0
-    # via onyx
+    # via
+    #   google-cloud-aiplatform
+    #   onyx
+google-resumable-media==2.8.2
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
 googleapis-common-protos==1.72.0
    # via
    #   google-api-core
+    #   grpc-google-iam-v1
+    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-http
 greenlet==3.2.4
    # via
    #   playwright
    #   sqlalchemy
+grpc-google-iam-v1==0.14.4
+    # via google-cloud-resource-manager
+grpcio==1.80.0
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+grpcio-status==1.80.0
+    # via google-api-core
 h11==0.16.0
    # via
    #   httpcore
@@ -562,6 +610,8 @@ packaging==24.2
    #   dask
    #   distributed
    #   fastmcp
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
    #   huggingface-hub
    #   jira
    #   kombu
@@ -608,12 +658,19 @@ propcache==0.4.1
    #   aiohttp
    #   yarl
 proto-plus==1.26.1
-    # via google-api-core
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
 protobuf==6.33.5
    # via
    #   ddtrace
    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
    #   onnxruntime
    #   opentelemetry-proto
    #   proto-plus
@@ -646,6 +703,7 @@ pydantic==2.11.7
    #   exa-py
    #   fastapi
    #   fastmcp
+    #   google-cloud-aiplatform
    #   google-genai
    #   langchain-core
    #   langfuse
@@ -702,6 +760,7 @@ python-dateutil==2.8.2
    #   botocore
    #   celery
    #   dateparser
+    #   google-cloud-bigquery
    #   htmldate
    #   hubspot-api-client
    #   kubernetes
@@ -780,6 +839,8 @@ requests==2.33.0
    #   dropbox
    #   exa-py
    #   google-api-core
+    #   google-cloud-bigquery
+    #   google-cloud-storage
    #   google-genai
    #   hubspot-api-client
    #   huggingface-hub
@@ -951,7 +1012,9 @@ typing-extensions==4.15.0
    #   exa-py
    #   exceptiongroup
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   jira
    #   langchain-core
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -113,6 +113,8 @@ distlib==0.4.0
    # via virtualenv
 distro==1.9.0
    # via openai
+docstring-parser==0.17.0
+    # via google-cloud-aiplatform
 durationpy==0.10
    # via kubernetes
 execnet==2.1.2
@@ -140,14 +142,65 @@ frozenlist==1.8.0
    #   aiosignal
 fsspec==2025.10.0
    # via huggingface-hub
+google-api-core==2.28.1
+    # via
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
 google-auth==2.48.0
    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
    #   google-genai
    #   kubernetes
+google-cloud-aiplatform==1.133.0
+    # via litellm
+google-cloud-bigquery==3.41.0
+    # via google-cloud-aiplatform
+google-cloud-core==2.5.1
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+google-cloud-resource-manager==1.17.0
+    # via google-cloud-aiplatform
+google-cloud-storage==3.10.1
+    # via google-cloud-aiplatform
+google-crc32c==1.8.0
+    # via
+    #   google-cloud-storage
+    #   google-resumable-media
 google-genai==1.52.0
-    # via onyx
+    # via
+    #   google-cloud-aiplatform
+    #   onyx
+google-resumable-media==2.8.2
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+googleapis-common-protos==1.72.0
+    # via
+    #   google-api-core
+    #   grpc-google-iam-v1
+    #   grpcio-status
 greenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
    # via sqlalchemy
+grpc-google-iam-v1==0.14.4
+    # via google-cloud-resource-manager
+grpcio==1.80.0
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+grpcio-status==1.80.0
+    # via google-api-core
 h11==0.16.0
    # via
    #   httpcore
@@ -264,6 +317,8 @@ openapi-generator-cli==7.17.0
 packaging==24.2
    # via
    #   black
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
    #   hatchling
    #   huggingface-hub
    #   ipykernel
@@ -304,6 +359,20 @@ propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
+proto-plus==1.26.1
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+protobuf==6.33.5
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   proto-plus
 psutil==7.1.3
    # via ipykernel
 ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
@@ -325,6 +394,7 @@ pydantic==2.11.7
    #   agent-client-protocol
    #   cohere
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
    #   litellm
    #   mcp
@@ -359,6 +429,7 @@ python-dateutil==2.8.2
    # via
    #   aiobotocore
    #   botocore
+    #   google-cloud-bigquery
    #   jupyter-client
    #   kubernetes
    #   matplotlib
@@ -391,6 +462,9 @@ reorder-python-imports-black==3.14.0
 requests==2.33.0
    # via
    #   cohere
+    #   google-api-core
+    #   google-cloud-bigquery
+    #   google-cloud-storage
    #   google-genai
    #   huggingface-hub
    #   kubernetes
@@ -485,7 +559,9 @@ typing-extensions==4.15.0
    #   celery-types
    #   cohere
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   ipython
    #   mcp
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -86,6 +86,8 @@ discord-py==2.4.0
    # via onyx
 distro==1.9.0
    # via openai
+docstring-parser==0.17.0
+    # via google-cloud-aiplatform
 durationpy==0.10
    # via kubernetes
 fastapi==0.133.1
@@ -102,12 +104,63 @@ frozenlist==1.8.0
    #   aiosignal
 fsspec==2025.10.0
    # via huggingface-hub
+google-api-core==2.28.1
+    # via
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
 google-auth==2.48.0
    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
    #   google-genai
    #   kubernetes
+google-cloud-aiplatform==1.133.0
+    # via litellm
+google-cloud-bigquery==3.41.0
+    # via google-cloud-aiplatform
+google-cloud-core==2.5.1
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+google-cloud-resource-manager==1.17.0
+    # via google-cloud-aiplatform
+google-cloud-storage==3.10.1
+    # via google-cloud-aiplatform
+google-crc32c==1.8.0
+    # via
+    #   google-cloud-storage
+    #   google-resumable-media
 google-genai==1.52.0
-    # via onyx
+    # via
+    #   google-cloud-aiplatform
+    #   onyx
+google-resumable-media==2.8.2
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+googleapis-common-protos==1.72.0
+    # via
+    #   google-api-core
+    #   grpc-google-iam-v1
+    #   grpcio-status
+grpc-google-iam-v1==0.14.4
+    # via google-cloud-resource-manager
+grpcio==1.80.0
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+grpcio-status==1.80.0
+    # via google-api-core
 h11==0.16.0
    # via
    #   httpcore
@@ -178,7 +231,10 @@ openai==2.14.0
    #   litellm
    #   onyx
 packaging==24.2
-    # via huggingface-hub
+    # via
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   huggingface-hub
 parameterized==0.9.0
    # via cohere
 posthog==3.7.4
@@ -192,6 +248,20 @@ propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
+proto-plus==1.26.1
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+protobuf==6.33.5
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   proto-plus
 py==1.11.0
    # via retry
 pyasn1==0.6.3
@@ -207,6 +277,7 @@ pydantic==2.11.7
    #   agent-client-protocol
    #   cohere
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
    #   litellm
    #   mcp
@@ -223,6 +294,7 @@ python-dateutil==2.8.2
    # via
    #   aiobotocore
    #   botocore
+    #   google-cloud-bigquery
    #   kubernetes
    #   posthog
 python-dotenv==1.1.1
@@ -246,6 +318,9 @@ regex==2025.11.3
 requests==2.33.0
    # via
    #   cohere
+    #   google-api-core
+    #   google-cloud-bigquery
+    #   google-cloud-storage
    #   google-genai
    #   huggingface-hub
    #   kubernetes
@@ -305,7 +380,9 @@ typing-extensions==4.15.0
    #   anyio
    #   cohere
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   mcp
    #   openai
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -101,6 +101,8 @@ discord-py==2.4.0
    # via onyx
 distro==1.9.0
    # via openai
+docstring-parser==0.17.0
+    # via google-cloud-aiplatform
 durationpy==0.10
    # via kubernetes
 einops==0.8.1
@@ -125,12 +127,63 @@ fsspec==2025.10.0
    # via
    #   huggingface-hub
    #   torch
+google-api-core==2.28.1
+    # via
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
 google-auth==2.48.0
    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
+    #   google-cloud-core
+    #   google-cloud-resource-manager
+    #   google-cloud-storage
    #   google-genai
    #   kubernetes
+google-cloud-aiplatform==1.133.0
+    # via litellm
+google-cloud-bigquery==3.41.0
+    # via google-cloud-aiplatform
+google-cloud-core==2.5.1
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+google-cloud-resource-manager==1.17.0
+    # via google-cloud-aiplatform
+google-cloud-storage==3.10.1
+    # via google-cloud-aiplatform
+google-crc32c==1.8.0
+    # via
+    #   google-cloud-storage
+    #   google-resumable-media
 google-genai==1.52.0
-    # via onyx
+    # via
+    #   google-cloud-aiplatform
+    #   onyx
+google-resumable-media==2.8.2
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-storage
+googleapis-common-protos==1.72.0
+    # via
+    #   google-api-core
+    #   grpc-google-iam-v1
+    #   grpcio-status
+grpc-google-iam-v1==0.14.4
+    # via google-cloud-resource-manager
+grpcio==1.80.0
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+grpcio-status==1.80.0
+    # via google-api-core
 h11==0.16.0
    # via
    #   httpcore
@@ -259,6 +312,8 @@ openai==2.14.0
 packaging==24.2
    # via
    #   accelerate
+    #   google-cloud-aiplatform
+    #   google-cloud-bigquery
    #   huggingface-hub
    #   kombu
    #   transformers
@@ -278,6 +333,20 @@ propcache==0.4.1
    # via
    #   aiohttp
    #   yarl
+proto-plus==1.26.1
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+protobuf==6.33.5
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   proto-plus
 psutil==7.1.3
    # via accelerate
 py==1.11.0
@@ -295,6 +364,7 @@ pydantic==2.11.7
    #   agent-client-protocol
    #   cohere
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
    #   litellm
    #   mcp
@@ -312,6 +382,7 @@ python-dateutil==2.8.2
    #   aiobotocore
    #   botocore
    #   celery
+    #   google-cloud-bigquery
    #   kubernetes
 python-dotenv==1.1.1
    # via
@@ -338,6 +409,9 @@ regex==2025.11.3
 requests==2.33.0
    # via
    #   cohere
+    #   google-api-core
+    #   google-cloud-bigquery
+    #   google-cloud-storage
    #   google-genai
    #   huggingface-hub
    #   kubernetes
@@ -425,7 +499,9 @@ typing-extensions==4.15.0
    #   anyio
    #   cohere
    #   fastapi
+    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   mcp
    #   openai
--- a/backend/tests/daily/connectors/gong/test_gong.py
+++ b/backend/tests/daily/connectors/gong/test_gong.py
@@ -7,7 +7,6 @@ import pytest

 from onyx.connectors.gong.connector import GongConnector
 from onyx.connectors.models import Document
-from onyx.connectors.models import HierarchyNode


@pytest.fixture
@@ -32,18 +31,20 @@ def test_gong_basic(
    mock_get_api_key: MagicMock,  # noqa: ARG001
    gong_connector: GongConnector,
 ) -> None:
-    doc_batch_generator = gong_connector.poll_source(0, time.time())
-
-    doc_batch = next(doc_batch_generator)
-    with pytest.raises(StopIteration):
-        next(doc_batch_generator)
-
-    assert len(doc_batch) == 2
+    checkpoint = gong_connector.build_dummy_checkpoint()

    docs: list[Document] = []
-    for doc in doc_batch:
-        if not isinstance(doc, HierarchyNode):
-            docs.append(doc)
+    while checkpoint.has_more:
+        generator = gong_connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    docs.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+    assert len(docs) == 2

    assert docs[0].semantic_identifier == "test with chris"
    assert docs[1].semantic_identifier == "Testing Gong"
--- a/backend/tests/unit/onyx/connectors/cross_connector_utils/test_miscellaneous_utils.py
+++ b/backend/tests/unit/onyx/connectors/cross_connector_utils/test_miscellaneous_utils.py
@@ -0,0 +1,53 @@
+import datetime
+
+import pytest
+
+from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
+
+
+def test_time_str_to_utc() -> None:
+    str_to_dt = {
+        "Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
+            2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
+        ),
+        "Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
+            2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
+        ),
+        "Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
+            2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
+        ),
+        "30 Jun 2023 18:45:01 +0300": datetime.datetime(
+            2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
+        ),
+        "22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
+            2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
+        ),
+        "Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
+            2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
+        ),
+    }
+    for strptime, expected_datetime in str_to_dt.items():
+        assert time_str_to_utc(strptime) == expected_datetime
+
+
+def test_time_str_to_utc_recovers_from_concatenated_headers() -> None:
+    # TZ is dropped during recovery, so the expected result is UTC rather
+    # than the original offset.
+    assert time_str_to_utc(
+        'Sat, 3 Nov 2007 14:33:28 -0200To: "jason" <jason@example.net>'
+    ) == datetime.datetime(2007, 11, 3, 14, 33, 28, tzinfo=datetime.timezone.utc)
+
+    assert time_str_to_utc(
+        "Fri, 20 Feb 2015 10:30:00 +0500Cc: someone@example.com"
+    ) == datetime.datetime(2015, 2, 20, 10, 30, 0, tzinfo=datetime.timezone.utc)
+
+
+def test_time_str_to_utc_raises_on_impossible_dates() -> None:
+    for bad in (
+        "Wed, 33 Sep 2007 13:42:59 +0100",
+        "Thu, 11 Oct 2007 31:50:55 +0900",
+        "not a date at all",
+        "",
+    ):
+        with pytest.raises(ValueError):
+            time_str_to_utc(bad)
--- a/backend/tests/unit/onyx/connectors/gmail/test_connector.py
+++ b/backend/tests/unit/onyx/connectors/gmail/test_connector.py
@@ -1,3 +1,4 @@
+import copy
 import datetime
 import json
 import os
@@ -8,7 +9,6 @@ from unittest.mock import patch

 from onyx.access.models import ExternalAccess
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from onyx.connectors.gmail.connector import _build_time_range_query
 from onyx.connectors.gmail.connector import GmailCheckpoint
 from onyx.connectors.gmail.connector import GmailConnector
@@ -51,29 +51,43 @@ def test_build_time_range_query() -> None:
    assert query is None


-def test_time_str_to_utc() -> None:
-    str_to_dt = {
-        "Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
-            2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
-        ),
-        "Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
-            2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
-        ),
-        "Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
-            2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
-        ),
-        "30 Jun 2023 18:45:01 +0300": datetime.datetime(
-            2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
-        ),
-        "22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
-            2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
-        ),
-        "Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
-            2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
-        ),
-    }
-    for strptime, expected_datetime in str_to_dt.items():
-        assert time_str_to_utc(strptime) == expected_datetime
+def _thread_with_date(date_header: str | None) -> dict[str, Any]:
+    """Load the fixture thread and replace (or strip, if None) its Date header."""
+    json_path = os.path.join(os.path.dirname(__file__), "thread.json")
+    with open(json_path, "r") as f:
+        thread = cast(dict[str, Any], json.load(f))
+    thread = copy.deepcopy(thread)
+
+    for message in thread["messages"]:
+        headers: list[dict[str, str]] = message["payload"]["headers"]
+        if date_header is None:
+            message["payload"]["headers"] = [
+                h for h in headers if h.get("name") != "Date"
+            ]
+            continue
+
+        replaced = False
+        for header in headers:
+            if header.get("name") == "Date":
+                header["value"] = date_header
+                replaced = True
+                break
+        if not replaced:
+            headers.append({"name": "Date", "value": date_header})
+
+    return thread
+
+
+def test_thread_to_document_skips_unparseable_dates() -> None:
+    for bad_date in (
+        "Wed, 33 Sep 2007 13:42:59 +0100",
+        "Thu, 11 Oct 2007 31:50:55 +0900",
+        "total garbage not even close to a date",
+    ):
+        doc = thread_to_document(_thread_with_date(bad_date), "admin@example.com")
+        assert isinstance(doc, Document), f"failed for {bad_date!r}"
+        assert doc.doc_updated_at is None
+        assert doc.id == "192edefb315737c3"


 def test_gmail_checkpoint_progression() -> None:
--- a/backend/tests/unit/onyx/connectors/gong/init.py
+++ b/backend/tests/unit/onyx/connectors/gong/init.py
--- a/backend/tests/unit/onyx/connectors/gong/test_gong_checkpoint.py
+++ b/backend/tests/unit/onyx/connectors/gong/test_gong_checkpoint.py
@@ -0,0 +1,483 @@
+import time
+from typing import Any
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+
+from onyx.connectors.gong.connector import GongConnector
+from onyx.connectors.gong.connector import GongConnectorCheckpoint
+from onyx.connectors.models import ConnectorFailure
+from onyx.connectors.models import Document
+
+
+def _make_transcript(call_id: str) -> dict[str, Any]:
+    return {
+        "callId": call_id,
+        "transcript": [
+            {
+                "speakerId": "speaker1",
+                "sentences": [{"text": "Hello world"}],
+            }
+        ],
+    }
+
+
+def _make_call_detail(call_id: str, title: str) -> dict[str, Any]:
+    return {
+        "metaData": {
+            "id": call_id,
+            "started": "2026-01-15T10:00:00Z",
+            "title": title,
+            "purpose": "Test call",
+            "url": f"https://app.gong.io/call?id={call_id}",
+            "system": "test-system",
+        },
+        "parties": [
+            {
+                "speakerId": "speaker1",
+                "name": "Alice",
+                "emailAddress": "alice@test.com",
+            }
+        ],
+    }
+
+
+@pytest.fixture
+def connector() -> GongConnector:
+    connector = GongConnector()
+    connector.load_credentials(
+        {
+            "gong_access_key": "test-key",
+            "gong_access_key_secret": "test-secret",
+        }
+    )
+    return connector
+
+
+class TestGongConnectorCheckpoint:
+    def test_build_dummy_checkpoint(self, connector: GongConnector) -> None:
+        checkpoint = connector.build_dummy_checkpoint()
+        assert checkpoint.has_more is True
+        assert checkpoint.workspace_ids is None
+        assert checkpoint.workspace_index == 0
+        assert checkpoint.cursor is None
+
+    def test_validate_checkpoint_json(self, connector: GongConnector) -> None:
+        original = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=["ws1", None],
+            workspace_index=1,
+            cursor="abc123",
+        )
+        json_str = original.model_dump_json()
+        restored = connector.validate_checkpoint_json(json_str)
+        assert restored == original
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_first_call_resolves_workspaces(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """First checkpoint call should resolve workspaces and return without fetching."""
+        # No workspaces configured — should resolve to [None]
+        checkpoint = connector.build_dummy_checkpoint()
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+
+        # Should return immediately (no yields)
+        with pytest.raises(StopIteration) as exc_info:
+            next(generator)
+
+        new_checkpoint = exc_info.value.value
+        assert new_checkpoint.workspace_ids == [None]
+        assert new_checkpoint.has_more is True
+        assert new_checkpoint.workspace_index == 0
+
+        # No API calls should have been made for workspace resolution
+        # when no workspaces are configured
+        mock_request.assert_not_called()
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_single_page_no_cursor(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """Single page of transcripts with no pagination cursor."""
+        transcript_response = MagicMock()
+        transcript_response.status_code = 200
+        transcript_response.json.return_value = {
+            "callTranscripts": [_make_transcript("call1")],
+            "records": {},
+        }
+
+        details_response = MagicMock()
+        details_response.status_code = 200
+        details_response.json.return_value = {
+            "calls": [_make_call_detail("call1", "Test Call")]
+        }
+
+        mock_request.side_effect = [transcript_response, details_response]
+
+        # Start from a checkpoint that already has workspaces resolved
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=[None],
+            workspace_index=0,
+        )
+
+        docs: list[Document] = []
+        failures: list[ConnectorFailure] = []
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    docs.append(item)
+                elif isinstance(item, ConnectorFailure):
+                    failures.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert len(docs) == 1
+        assert docs[0].semantic_identifier == "Test Call"
+        assert len(failures) == 0
+        assert checkpoint.has_more is False
+        assert checkpoint.workspace_index == 1
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_multi_page_with_cursor(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """Two pages of transcripts — cursor advances between checkpoint calls."""
+        # Page 1: returns cursor
+        page1_response = MagicMock()
+        page1_response.status_code = 200
+        page1_response.json.return_value = {
+            "callTranscripts": [_make_transcript("call1")],
+            "records": {"cursor": "page2cursor"},
+        }
+
+        details1_response = MagicMock()
+        details1_response.status_code = 200
+        details1_response.json.return_value = {
+            "calls": [_make_call_detail("call1", "Call One")]
+        }
+
+        # Page 2: no cursor (done)
+        page2_response = MagicMock()
+        page2_response.status_code = 200
+        page2_response.json.return_value = {
+            "callTranscripts": [_make_transcript("call2")],
+            "records": {},
+        }
+
+        details2_response = MagicMock()
+        details2_response.status_code = 200
+        details2_response.json.return_value = {
+            "calls": [_make_call_detail("call2", "Call Two")]
+        }
+
+        mock_request.side_effect = [
+            page1_response,
+            details1_response,
+            page2_response,
+            details2_response,
+        ]
+
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=[None],
+            workspace_index=0,
+        )
+
+        all_docs: list[Document] = []
+
+        # First checkpoint call — page 1
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    all_docs.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert len(all_docs) == 1
+        assert checkpoint.cursor == "page2cursor"
+        assert checkpoint.has_more is True
+
+        # Second checkpoint call — page 2
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    all_docs.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert len(all_docs) == 2
+        assert all_docs[0].semantic_identifier == "Call One"
+        assert all_docs[1].semantic_identifier == "Call Two"
+        assert checkpoint.has_more is False
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_missing_call_details_yields_failure(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """When call details are missing after retries, yield ConnectorFailure."""
+        transcript_response = MagicMock()
+        transcript_response.status_code = 200
+        transcript_response.json.return_value = {
+            "callTranscripts": [_make_transcript("call1")],
+            "records": {},
+        }
+
+        # Return empty call details every time (simulating the race condition)
+        empty_details = MagicMock()
+        empty_details.status_code = 200
+        empty_details.json.return_value = {"calls": []}
+
+        mock_request.side_effect = [transcript_response] + [
+            empty_details
+        ] * GongConnector.MAX_CALL_DETAILS_ATTEMPTS
+
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=[None],
+            workspace_index=0,
+        )
+
+        failures: list[ConnectorFailure] = []
+        docs: list[Document] = []
+
+        with patch("onyx.connectors.gong.connector.time.sleep"):
+            generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+            try:
+                while True:
+                    item = next(generator)
+                    if isinstance(item, ConnectorFailure):
+                        failures.append(item)
+                    elif isinstance(item, Document):
+                        docs.append(item)
+            except StopIteration as e:
+                checkpoint = e.value
+
+        assert len(docs) == 0
+        assert len(failures) == 1
+        assert failures[0].failed_document is not None
+        assert failures[0].failed_document.document_id == "call1"
+        assert checkpoint.has_more is False
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_multi_workspace_iteration(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """Checkpoint iterates through multiple workspaces."""
+        # Workspace 1: one call
+        ws1_transcript = MagicMock()
+        ws1_transcript.status_code = 200
+        ws1_transcript.json.return_value = {
+            "callTranscripts": [_make_transcript("call_ws1")],
+            "records": {},
+        }
+        ws1_details = MagicMock()
+        ws1_details.status_code = 200
+        ws1_details.json.return_value = {
+            "calls": [_make_call_detail("call_ws1", "WS1 Call")]
+        }
+
+        # Workspace 2: one call
+        ws2_transcript = MagicMock()
+        ws2_transcript.status_code = 200
+        ws2_transcript.json.return_value = {
+            "callTranscripts": [_make_transcript("call_ws2")],
+            "records": {},
+        }
+        ws2_details = MagicMock()
+        ws2_details.status_code = 200
+        ws2_details.json.return_value = {
+            "calls": [_make_call_detail("call_ws2", "WS2 Call")]
+        }
+
+        mock_request.side_effect = [
+            ws1_transcript,
+            ws1_details,
+            ws2_transcript,
+            ws2_details,
+        ]
+
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=["ws1_id", "ws2_id"],
+            workspace_index=0,
+        )
+
+        all_docs: list[Document] = []
+
+        # Checkpoint call 1 — workspace 1
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    all_docs.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert checkpoint.workspace_index == 1
+        assert checkpoint.has_more is True
+
+        # Checkpoint call 2 — workspace 2
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    all_docs.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert len(all_docs) == 2
+        assert all_docs[0].semantic_identifier == "WS1 Call"
+        assert all_docs[1].semantic_identifier == "WS2 Call"
+        assert checkpoint.has_more is False
+        assert checkpoint.workspace_index == 2
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_empty_workspace_404(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """404 from transcript API means no calls — workspace exhausted."""
+        response_404 = MagicMock()
+        response_404.status_code = 404
+
+        mock_request.return_value = response_404
+
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=[None],
+            workspace_index=0,
+        )
+
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                next(generator)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert checkpoint.has_more is False
+        assert checkpoint.workspace_index == 1
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_retry_only_fetches_missing_ids(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """Retry for missing call details should only re-request the missing IDs."""
+        transcript_response = MagicMock()
+        transcript_response.status_code = 200
+        transcript_response.json.return_value = {
+            "callTranscripts": [
+                _make_transcript("call1"),
+                _make_transcript("call2"),
+            ],
+            "records": {},
+        }
+
+        # First fetch: returns call1 but not call2
+        partial_details = MagicMock()
+        partial_details.status_code = 200
+        partial_details.json.return_value = {
+            "calls": [_make_call_detail("call1", "Call One")]
+        }
+
+        # Second fetch (retry): returns call2
+        missing_details = MagicMock()
+        missing_details.status_code = 200
+        missing_details.json.return_value = {
+            "calls": [_make_call_detail("call2", "Call Two")]
+        }
+
+        mock_request.side_effect = [
+            transcript_response,
+            partial_details,
+            missing_details,
+        ]
+
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=[None],
+            workspace_index=0,
+        )
+
+        docs: list[Document] = []
+        with patch("onyx.connectors.gong.connector.time.sleep"):
+            generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+            try:
+                while True:
+                    item = next(generator)
+                    if isinstance(item, Document):
+                        docs.append(item)
+            except StopIteration:
+                pass
+
+        assert len(docs) == 2
+        assert docs[0].semantic_identifier == "Call One"
+        assert docs[1].semantic_identifier == "Call Two"
+
+        # Verify: 3 API calls total (1 transcript + 1 full details + 1 retry for missing only)
+        assert mock_request.call_count == 3
+        # The retry call should only request call2, not both
+        retry_call_body = mock_request.call_args_list[2][1]["json"]
+        assert retry_call_body["filter"]["callIds"] == ["call2"]
+
+    @patch.object(GongConnector, "_throttled_request")
+    def test_expired_cursor_restarts_workspace(
+        self,
+        mock_request: MagicMock,
+        connector: GongConnector,
+    ) -> None:
+        """Expired pagination cursor resets checkpoint to restart the workspace."""
+        expired_response = MagicMock()
+        expired_response.status_code = 400
+        expired_response.ok = False
+        expired_response.text = '{"requestId":"abc","errors":["cursor has expired"]}'
+
+        mock_request.return_value = expired_response
+
+        # Checkpoint mid-pagination with a (now-expired) cursor
+        checkpoint = GongConnectorCheckpoint(
+            has_more=True,
+            workspace_ids=[None],
+            workspace_index=0,
+            cursor="stale-cursor",
+        )
+
+        docs: list[Document] = []
+        generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
+        try:
+            while True:
+                item = next(generator)
+                if isinstance(item, Document):
+                    docs.append(item)
+        except StopIteration as e:
+            checkpoint = e.value
+
+        assert len(docs) == 0
+        # Cursor reset so next call restarts the workspace from scratch
+        assert checkpoint.cursor is None
+        assert checkpoint.workspace_index == 0
+        assert checkpoint.has_more is True
--- a/backend/tests/unit/onyx/file_processing/test_pdf.py
+++ b/backend/tests/unit/onyx/file_processing/test_pdf.py
@@ -12,6 +12,10 @@ dependency on pypdf internals (pypdf.generic).
 from io import BytesIO
 from pathlib import Path

+import pytest
+
+from onyx.file_processing import extract_file_text
+from onyx.file_processing.extract_file_text import count_pdf_embedded_images
 from onyx.file_processing.extract_file_text import pdf_to_text
 from onyx.file_processing.extract_file_text import read_pdf_file
 from onyx.file_processing.password_validation import is_pdf_protected
@@ -96,6 +100,80 @@ class TestReadPdfFile:
        # Returned list is empty when callback is used
        assert images == []

+    def test_image_cap_skips_images_above_limit(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """When the embedded-image cap is exceeded, remaining images are skipped.
+
+        The cap protects the user-file-processing worker from OOMing on PDFs
+        with thousands of embedded images. Setting the cap to 0 should yield
+        zero extracted images even though the fixture has one.
+        """
+        monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
+        _, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
+        assert images == []
+
+    def test_image_cap_at_limit_extracts_up_to_cap(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """A cap >= image count behaves identically to the uncapped path."""
+        monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 100)
+        _, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
+        assert len(images) == 1
+
+    def test_image_cap_with_callback_stops_streaming_at_limit(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """The cap also short-circuits the streaming callback path."""
+        monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
+        collected: list[tuple[bytes, str]] = []
+
+        def callback(data: bytes, name: str) -> None:
+            collected.append((data, name))
+
+        read_pdf_file(
+            _load("with_image.pdf"), extract_images=True, image_callback=callback
+        )
+        assert collected == []
+
+
+# ── count_pdf_embedded_images ────────────────────────────────────────────
+
+
+class TestCountPdfEmbeddedImages:
+    def test_returns_count_for_normal_pdf(self) -> None:
+        assert count_pdf_embedded_images(_load("with_image.pdf"), cap=10) == 1
+
+    def test_short_circuits_above_cap(self) -> None:
+        # with_image.pdf has 1 image. cap=0 means "anything > 0 is over cap" —
+        # function returns on first increment as the over-cap sentinel.
+        assert count_pdf_embedded_images(_load("with_image.pdf"), cap=0) == 1
+
+    def test_returns_zero_for_pdf_without_images(self) -> None:
+        assert count_pdf_embedded_images(_load("simple.pdf"), cap=10) == 0
+
+    def test_returns_zero_for_invalid_pdf(self) -> None:
+        assert count_pdf_embedded_images(BytesIO(b"not a pdf"), cap=10) == 0
+
+    def test_returns_zero_for_password_locked_pdf(self) -> None:
+        # encrypted.pdf has an open password; we can't inspect without it, so
+        # the helper returns 0 — callers rely on the password-protected check
+        # that runs earlier in the upload pipeline.
+        assert count_pdf_embedded_images(_load("encrypted.pdf"), cap=10) == 0
+
+    def test_inspects_owner_password_only_pdf(self) -> None:
+        # owner_protected.pdf is encrypted but has no open password. It should
+        # decrypt with an empty string and count images normally. The fixture
+        # has zero images, so 0 is a real count (not the "bail on encrypted"
+        # path).
+        assert count_pdf_embedded_images(_load("owner_protected.pdf"), cap=10) == 0
+
+    def test_preserves_file_position(self) -> None:
+        pdf = _load("with_image.pdf")
+        pdf.seek(42)
+        count_pdf_embedded_images(pdf, cap=10)
+        assert pdf.tell() == 42
+

 # ── pdf_to_text ──────────────────────────────────────────────────────────

--- a/backend/tests/unit/server/metrics/test_celery_task_metrics.py
+++ b/backend/tests/unit/server/metrics/test_celery_task_metrics.py
@@ -1,15 +1,18 @@
 """Tests for generic Celery task lifecycle Prometheus metrics."""

+import time
 from collections.abc import Iterator
 from unittest.mock import MagicMock

 import pytest

+from onyx.background.celery.apps.app_base import on_before_task_publish
 from onyx.server.metrics.celery_task_metrics import _task_start_times
 from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
 from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
 from onyx.server.metrics.celery_task_metrics import TASK_COMPLETED
 from onyx.server.metrics.celery_task_metrics import TASK_DURATION
+from onyx.server.metrics.celery_task_metrics import TASK_QUEUE_WAIT
 from onyx.server.metrics.celery_task_metrics import TASK_STARTED
 from onyx.server.metrics.celery_task_metrics import TASKS_ACTIVE

@@ -22,11 +25,18 @@ def reset_metrics() -> Iterator[None]:
    _task_start_times.clear()


-def _make_task(name: str = "test_task", queue: str = "test_queue") -> MagicMock:
+def _make_task(
+    name: str = "test_task",
+    queue: str = "test_queue",
+    enqueued_at: float | None = None,
+) -> MagicMock:
    task = MagicMock()
    task.name = name
    task.request = MagicMock()
    task.request.delivery_info = {"routing_key": queue}
+    task.request.headers = (
+        {"enqueued_at": enqueued_at} if enqueued_at is not None else {}
+    )
    return task


@@ -72,6 +82,35 @@ class TestCeleryTaskPrerun:
        on_celery_task_prerun("task-1", task)
        assert "task-1" in _task_start_times

+    def test_observes_queue_wait_when_enqueued_at_present(self) -> None:
+        enqueued_at = time.time() - 30  # simulates 30s wait
+        task = _make_task(enqueued_at=enqueued_at)
+
+        before = TASK_QUEUE_WAIT.labels(
+            task_name="test_task", queue="test_queue"
+        )._sum.get()
+
+        on_celery_task_prerun("task-1", task)
+
+        after = TASK_QUEUE_WAIT.labels(
+            task_name="test_task", queue="test_queue"
+        )._sum.get()
+        assert after >= before + 30
+
+    def test_skips_queue_wait_when_enqueued_at_missing(self) -> None:
+        task = _make_task()  # no enqueued_at in headers
+
+        before = TASK_QUEUE_WAIT.labels(
+            task_name="test_task", queue="test_queue"
+        )._sum.get()
+
+        on_celery_task_prerun("task-2", task)
+
+        after = TASK_QUEUE_WAIT.labels(
+            task_name="test_task", queue="test_queue"
+        )._sum.get()
+        assert after == before
+

 class TestCeleryTaskPostrun:
    def test_increments_completed_success(self) -> None:
@@ -151,3 +190,15 @@ class TestCeleryTaskPostrun:
        task = _make_task()
        on_celery_task_postrun("task-1", task, "SUCCESS")
        # Should not raise
+
+
+class TestBeforeTaskPublish:
+    def test_stamps_enqueued_at_into_headers(self) -> None:
+        before = time.time()
+        headers: dict = {}
+        on_before_task_publish(headers=headers)
+        assert "enqueued_at" in headers
+        assert headers["enqueued_at"] >= before
+
+    def test_noop_when_headers_is_none(self) -> None:
+        on_before_task_publish(headers=None)  # should not raise
--- a/backend/tests/unit/server/metrics/test_deletion_metrics.py
+++ b/backend/tests/unit/server/metrics/test_deletion_metrics.py
@@ -0,0 +1,204 @@
+"""Tests for deletion-specific Prometheus metrics."""
+
+import pytest
+
+from onyx.server.metrics.deletion_metrics import DELETION_BLOCKED
+from onyx.server.metrics.deletion_metrics import DELETION_COMPLETED
+from onyx.server.metrics.deletion_metrics import DELETION_FENCE_RESET
+from onyx.server.metrics.deletion_metrics import DELETION_STARTED
+from onyx.server.metrics.deletion_metrics import DELETION_TASKSET_DURATION
+from onyx.server.metrics.deletion_metrics import inc_deletion_blocked
+from onyx.server.metrics.deletion_metrics import inc_deletion_completed
+from onyx.server.metrics.deletion_metrics import inc_deletion_fence_reset
+from onyx.server.metrics.deletion_metrics import inc_deletion_started
+from onyx.server.metrics.deletion_metrics import observe_deletion_taskset_duration
+
+
+class TestIncDeletionStarted:
+    def test_increments_counter(self) -> None:
+        before = DELETION_STARTED.labels(tenant_id="t1")._value.get()
+
+        inc_deletion_started("t1")
+
+        after = DELETION_STARTED.labels(tenant_id="t1")._value.get()
+        assert after == before + 1
+
+    def test_labels_by_tenant(self) -> None:
+        before_t1 = DELETION_STARTED.labels(tenant_id="t1")._value.get()
+        before_t2 = DELETION_STARTED.labels(tenant_id="t2")._value.get()
+
+        inc_deletion_started("t1")
+
+        assert DELETION_STARTED.labels(tenant_id="t1")._value.get() == before_t1 + 1
+        assert DELETION_STARTED.labels(tenant_id="t2")._value.get() == before_t2
+
+    def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setattr(
+            DELETION_STARTED,
+            "labels",
+            lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        inc_deletion_started("t1")
+
+
+class TestIncDeletionCompleted:
+    def test_increments_counter(self) -> None:
+        before = DELETION_COMPLETED.labels(
+            tenant_id="t1", outcome="success"
+        )._value.get()
+
+        inc_deletion_completed("t1", "success")
+
+        after = DELETION_COMPLETED.labels(
+            tenant_id="t1", outcome="success"
+        )._value.get()
+        assert after == before + 1
+
+    def test_labels_by_outcome(self) -> None:
+        before_success = DELETION_COMPLETED.labels(
+            tenant_id="t1", outcome="success"
+        )._value.get()
+        before_failure = DELETION_COMPLETED.labels(
+            tenant_id="t1", outcome="failure"
+        )._value.get()
+
+        inc_deletion_completed("t1", "success")
+
+        assert (
+            DELETION_COMPLETED.labels(tenant_id="t1", outcome="success")._value.get()
+            == before_success + 1
+        )
+        assert (
+            DELETION_COMPLETED.labels(tenant_id="t1", outcome="failure")._value.get()
+            == before_failure
+        )
+
+    def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setattr(
+            DELETION_COMPLETED,
+            "labels",
+            lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        inc_deletion_completed("t1", "success")
+
+
+class TestObserveDeletionTasksetDuration:
+    def test_observes_duration(self) -> None:
+        before = DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="success"
+        )._sum.get()
+
+        observe_deletion_taskset_duration("t1", "success", 120.0)
+
+        after = DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="success"
+        )._sum.get()
+        assert after == pytest.approx(before + 120.0)
+
+    def test_labels_by_tenant(self) -> None:
+        before_t1 = DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="success"
+        )._sum.get()
+        before_t2 = DELETION_TASKSET_DURATION.labels(
+            tenant_id="t2", outcome="success"
+        )._sum.get()
+
+        observe_deletion_taskset_duration("t1", "success", 60.0)
+
+        assert DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="success"
+        )._sum.get() == pytest.approx(before_t1 + 60.0)
+        assert DELETION_TASKSET_DURATION.labels(
+            tenant_id="t2", outcome="success"
+        )._sum.get() == pytest.approx(before_t2)
+
+    def test_labels_by_outcome(self) -> None:
+        before_success = DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="success"
+        )._sum.get()
+        before_failure = DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="failure"
+        )._sum.get()
+
+        observe_deletion_taskset_duration("t1", "failure", 45.0)
+
+        assert DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="success"
+        )._sum.get() == pytest.approx(before_success)
+        assert DELETION_TASKSET_DURATION.labels(
+            tenant_id="t1", outcome="failure"
+        )._sum.get() == pytest.approx(before_failure + 45.0)
+
+    def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setattr(
+            DELETION_TASKSET_DURATION,
+            "labels",
+            lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        observe_deletion_taskset_duration("t1", "success", 10.0)
+
+
+class TestIncDeletionBlocked:
+    def test_increments_counter(self) -> None:
+        before = DELETION_BLOCKED.labels(
+            tenant_id="t1", blocker="indexing"
+        )._value.get()
+
+        inc_deletion_blocked("t1", "indexing")
+
+        after = DELETION_BLOCKED.labels(tenant_id="t1", blocker="indexing")._value.get()
+        assert after == before + 1
+
+    def test_labels_by_blocker(self) -> None:
+        before_idx = DELETION_BLOCKED.labels(
+            tenant_id="t1", blocker="indexing"
+        )._value.get()
+        before_prune = DELETION_BLOCKED.labels(
+            tenant_id="t1", blocker="pruning"
+        )._value.get()
+
+        inc_deletion_blocked("t1", "indexing")
+
+        assert (
+            DELETION_BLOCKED.labels(tenant_id="t1", blocker="indexing")._value.get()
+            == before_idx + 1
+        )
+        assert (
+            DELETION_BLOCKED.labels(tenant_id="t1", blocker="pruning")._value.get()
+            == before_prune
+        )
+
+    def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setattr(
+            DELETION_BLOCKED,
+            "labels",
+            lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        inc_deletion_blocked("t1", "indexing")
+
+
+class TestIncDeletionFenceReset:
+    def test_increments_counter(self) -> None:
+        before = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
+
+        inc_deletion_fence_reset("t1")
+
+        after = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
+        assert after == before + 1
+
+    def test_labels_by_tenant(self) -> None:
+        before_t1 = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
+        before_t2 = DELETION_FENCE_RESET.labels(tenant_id="t2")._value.get()
+
+        inc_deletion_fence_reset("t1")
+
+        assert DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get() == before_t1 + 1
+        assert DELETION_FENCE_RESET.labels(tenant_id="t2")._value.get() == before_t2
+
+    def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setattr(
+            DELETION_FENCE_RESET,
+            "labels",
+            lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
+        )
+        inc_deletion_fence_reset("t1")
--- a/backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
+++ b/backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
@@ -1,16 +1,11 @@
 """Tests for indexing pipeline Prometheus collectors."""

 from collections.abc import Iterator
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
 from unittest.mock import MagicMock
 from unittest.mock import patch

 import pytest

-from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
-from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
 from onyx.server.metrics.indexing_pipeline import QueueDepthCollector


@@ -18,7 +13,7 @@ from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
 def _mock_broker_client() -> Iterator[None]:
    """Patch celery_get_broker_client for all collector tests."""
    with patch(
-        "onyx.background.celery.celery_redis.celery_get_broker_client",
+        "onyx.server.metrics.indexing_pipeline.celery_get_broker_client",
        return_value=MagicMock(),
    ):
        yield
@@ -137,212 +132,3 @@ class TestQueueDepthCollector:
            stale_result = collector.collect()

        assert stale_result is good_result
-
-
-class TestIndexAttemptCollector:
-    def test_returns_empty_when_not_configured(self) -> None:
-        collector = IndexAttemptCollector()
-        assert collector.collect() == []
-
-    def test_returns_empty_describe(self) -> None:
-        collector = IndexAttemptCollector()
-        assert collector.describe() == []
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_collects_index_attempts(
-        self,
-        mock_get_session: MagicMock,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = IndexAttemptCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = ["public"]
-
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        from onyx.db.enums import IndexingStatus
-
-        mock_row = (
-            IndexingStatus.IN_PROGRESS,
-            MagicMock(value="web"),
-            81,
-            "Table Tennis Blade Guide",
-            2,
-        )
-        mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
-            mock_row
-        ]
-
-        families = collector.collect()
-        assert len(families) == 1
-        assert families[0].name == "onyx_index_attempts_active"
-        assert len(families[0].samples) == 1
-        sample = families[0].samples[0]
-        assert sample.labels == {
-            "status": "in_progress",
-            "source": "web",
-            "tenant_id": "public",
-            "connector_name": "Table Tennis Blade Guide",
-            "cc_pair_id": "81",
-        }
-        assert sample.value == 2
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    def test_handles_db_error_gracefully(
-        self,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = IndexAttemptCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.side_effect = Exception("DB down")
-        families = collector.collect()
-        # No stale cache, so returns empty
-        assert families == []
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    def test_skips_none_tenant_ids(
-        self,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = IndexAttemptCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = [None]
-        families = collector.collect()
-        assert len(families) == 1  # Returns the gauge family, just with no samples
-        assert len(families[0].samples) == 0
-
-
-class TestConnectorHealthCollector:
-    def test_returns_empty_when_not_configured(self) -> None:
-        collector = ConnectorHealthCollector()
-        assert collector.collect() == []
-
-    def test_returns_empty_describe(self) -> None:
-        collector = ConnectorHealthCollector()
-        assert collector.describe() == []
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_collects_connector_health(
-        self,
-        mock_get_session: MagicMock,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = ConnectorHealthCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = ["public"]
-
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        now = datetime.now(tz=timezone.utc)
-        last_success = now - timedelta(hours=2)
-
-        mock_status = MagicMock(value="ACTIVE")
-        mock_source = MagicMock(value="google_drive")
-        # Row: (id, status, in_error, last_success, name, source)
-        mock_row = (
-            42,
-            mock_status,
-            True,  # in_repeated_error_state
-            last_success,
-            "My GDrive Connector",
-            mock_source,
-        )
-        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
-
-        # Mock the index attempt queries (error counts + docs counts)
-        mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (
-            []
-        )
-
-        families = collector.collect()
-
-        assert len(families) == 6
-        names = {f.name for f in families}
-        assert names == {
-            "onyx_connector_last_success_age_seconds",
-            "onyx_connector_in_error_state",
-            "onyx_connectors_by_status",
-            "onyx_connectors_in_error_total",
-            "onyx_connector_docs_indexed",
-            "onyx_connector_error_count",
-        }
-
-        staleness = next(
-            f for f in families if f.name == "onyx_connector_last_success_age_seconds"
-        )
-        assert len(staleness.samples) == 1
-        assert staleness.samples[0].value == pytest.approx(7200, abs=5)
-
-        error_state = next(
-            f for f in families if f.name == "onyx_connector_in_error_state"
-        )
-        assert error_state.samples[0].value == 1.0
-
-        by_status = next(f for f in families if f.name == "onyx_connectors_by_status")
-        assert by_status.samples[0].labels == {
-            "tenant_id": "public",
-            "status": "ACTIVE",
-        }
-        assert by_status.samples[0].value == 1
-
-        error_total = next(
-            f for f in families if f.name == "onyx_connectors_in_error_total"
-        )
-        assert error_total.samples[0].value == 1
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_skips_staleness_when_no_last_success(
-        self,
-        mock_get_session: MagicMock,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = ConnectorHealthCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = ["public"]
-
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        mock_status = MagicMock(value="INITIAL_INDEXING")
-        mock_source = MagicMock(value="slack")
-        mock_row = (
-            10,
-            mock_status,
-            False,
-            None,  # no last_successful_index_time
-            0,
-            mock_source,
-        )
-        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
-
-        families = collector.collect()
-
-        staleness = next(
-            f for f in families if f.name == "onyx_connector_last_success_age_seconds"
-        )
-        assert len(staleness.samples) == 0
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    def test_handles_db_error_gracefully(
-        self,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = ConnectorHealthCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.side_effect = Exception("DB down")
-        families = collector.collect()
-        assert families == []
--- a/backend/tests/unit/server/metrics/test_worker_health.py
+++ b/backend/tests/unit/server/metrics/test_worker_health.py
@@ -129,12 +129,36 @@ class TestWorkerHealthCollector:
        up = families[1]
        assert up.name == "onyx_celery_worker_up"
        assert len(up.samples) == 3
-        # Labels use short names (before @)
-        labels = {s.labels["worker"] for s in up.samples}
-        assert labels == {"primary", "docfetching", "monitoring"}
+        label_pairs = {
+            (s.labels["worker_type"], s.labels["hostname"]) for s in up.samples
+        }
+        assert label_pairs == {
+            ("primary", "host1"),
+            ("docfetching", "host1"),
+            ("monitoring", "host1"),
+        }
        for sample in up.samples:
            assert sample.value == 1

+    def test_replicas_of_same_worker_type_are_distinct(self) -> None:
+        """Regression: ``docprocessing@pod-1`` and ``docprocessing@pod-2`` must
+        produce separate samples, not collapse into one duplicate-timestamp
+        series.
+        """
+        monitor = WorkerHeartbeatMonitor(MagicMock())
+        monitor._on_heartbeat({"hostname": "docprocessing@pod-1"})
+        monitor._on_heartbeat({"hostname": "docprocessing@pod-2"})
+        monitor._on_heartbeat({"hostname": "docprocessing@pod-3"})
+
+        collector = WorkerHealthCollector(cache_ttl=0)
+        collector.set_monitor(monitor)
+
+        up = collector.collect()[1]
+        assert len(up.samples) == 3
+        hostnames = {s.labels["hostname"] for s in up.samples}
+        assert hostnames == {"pod-1", "pod-2", "pod-3"}
+        assert all(s.labels["worker_type"] == "docprocessing" for s in up.samples)
+
    def test_reports_dead_worker(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
        monitor._on_heartbeat({"hostname": "primary@host1"})
@@ -151,9 +175,9 @@ class TestWorkerHealthCollector:
        assert active.samples[0].value == 1

        up = families[1]
-        samples_by_name = {s.labels["worker"]: s.value for s in up.samples}
-        assert samples_by_name["primary"] == 1
-        assert samples_by_name["monitoring"] == 0
+        samples_by_type = {s.labels["worker_type"]: s.value for s in up.samples}
+        assert samples_by_type["primary"] == 1
+        assert samples_by_type["monitoring"] == 0

    def test_empty_monitor_returns_zero(self) -> None:
        monitor = WorkerHeartbeatMonitor(MagicMock())
--- a/docs/METRICS.md
+++ b/docs/METRICS.md
@@ -217,11 +217,23 @@ Enriches docfetching and docprocessing tasks with connector-level labels. Silent
 | `onyx_indexing_task_completed_total`  | Counter   | `task_name`, `source`, `tenant_id`, `cc_pair_id`, `outcome` | Indexing tasks completed per connector   |
 | `onyx_indexing_task_duration_seconds` | Histogram | `task_name`, `source`, `tenant_id`                          | Indexing task duration by connector type |

-`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string). The pull-based collectors on the monitoring worker include it since they have bounded cardinality (one series per connector).
+`connector_name` is intentionally excluded from these per-task counters to avoid unbounded cardinality (it's a free-form user string).
+
+### Connector Health Metrics (`onyx.server.metrics.connector_health_metrics`)
+
+Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration). Unlike the per-task counters above, these include `connector_name` because their cardinality is bounded by the number of connectors (one series per connector), not by the number of task executions.
+
+| Metric                                          | Type    | Labels                                                          | Description                                                   |
+| ----------------------------------------------- | ------- | --------------------------------------------------------------- | ------------------------------------------------------------- |
+| `onyx_index_attempt_transitions_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`, `status` | Index attempt status transitions (in_progress, success, etc.) |
+| `onyx_connector_in_error_state`                 | Gauge   | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Whether connector is in repeated error state (1=yes, 0=no)    |
+| `onyx_connector_last_success_timestamp_seconds` | Gauge   | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Unix timestamp of last successful indexing                    |
+| `onyx_connector_docs_indexed_total`             | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Total documents indexed per connector (monotonic)             |
+| `onyx_connector_indexing_errors_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`           | Total failed index attempts per connector (monotonic)         |

 ### Pull-Based Collectors (`onyx.server.metrics.indexing_pipeline`)

-Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at scrape time with a 30-second TTL cache.
+Registered only in the **Monitoring** worker. Collectors query Redis at scrape time with a 30-second TTL cache and a 120-second timeout to prevent the `/metrics` endpoint from hanging.

 | Metric                               | Type  | Labels  | Description                         |
 | ------------------------------------ | ----- | ------- | ----------------------------------- |
@@ -229,8 +241,6 @@ Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at
 | `onyx_queue_unacked`                 | Gauge | `queue` | Unacknowledged messages per queue   |
 | `onyx_queue_oldest_task_age_seconds` | Gauge | `queue` | Age of the oldest task in the queue |

-Plus additional connector health, index attempt, and worker heartbeat metrics — see `indexing_pipeline.py` for the full list.
-
 ### Adding Metrics to a Worker

 Currently only the docfetching and docprocessing workers have push-based task metrics wired up. To add metrics to another worker (e.g. heavy, light, primary):
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
    "cohere==5.6.1",
    "fastapi==0.133.1",
    "google-genai==1.52.0",
-    "litellm==1.81.6",
+    "litellm[google]==1.81.6",
    "openai==2.14.0",
    "pydantic==2.11.7",
    "prometheus_client>=0.21.1",
--- a/uv.lock
+++ b/uv.lock
@@ -2115,6 +2115,12 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ed/d4/90197b416cb61cefd316964fd9e7bd8324bcbafabf40eef14a9f20b81974/google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c", size = 173706, upload-time = "2025-10-28T21:34:50.151Z" },
 ]

+[package.optional-dependencies]
+grpc = [
+    { name = "grpcio" },
+    { name = "grpcio-status" },
+]
+
 [[package]]
 name = "google-api-python-client"
 version = "2.86.0"
@@ -2172,6 +2178,124 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/4a/07/8d9a8186e6768b55dfffeb57c719bc03770cf8a970a074616ae6f9e26a57/google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb", size = 18926, upload-time = "2023-02-07T20:53:18.837Z" },
 ]

+[[package]]
+name = "google-cloud-aiplatform"
+version = "1.133.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docstring-parser" },
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "google-cloud-bigquery" },
+    { name = "google-cloud-resource-manager" },
+    { name = "google-cloud-storage" },
+    { name = "google-genai" },
+    { name = "packaging" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d4/be/31ce7fd658ddebafbe5583977ddee536b2bacc491ad10b5a067388aec66f/google_cloud_aiplatform-1.133.0.tar.gz", hash = "sha256:3a6540711956dd178daaab3c2c05db476e46d94ac25912b8cf4f59b00b058ae0", size = 9921309, upload-time = "2026-01-08T22:11:25.079Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/5b/ef74ff65aebb74eaba51078e33ddd897247ba0d1197fd5a7953126205519/google_cloud_aiplatform-1.133.0-py2.py3-none-any.whl", hash = "sha256:dfc81228e987ca10d1c32c7204e2131b3c8d6b7c8e0b4e23bf7c56816bc4c566", size = 8184595, upload-time = "2026-01-08T22:11:22.067Z" },
+]
+
+[[package]]
+name = "google-cloud-bigquery"
+version = "3.41.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "google-cloud-core" },
+    { name = "google-resumable-media" },
+    { name = "packaging" },
+    { name = "python-dateutil" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ce/13/6515c7aab55a4a0cf708ffd309fb9af5bab54c13e32dc22c5acd6497193c/google_cloud_bigquery-3.41.0.tar.gz", hash = "sha256:2217e488b47ed576360c9b2cc07d59d883a54b83167c0ef37f915c26b01a06fe", size = 513434, upload-time = "2026-03-30T22:50:55.347Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/33/1d3902efadef9194566d499d61507e1f038454e0b55499d2d7f8ab2a4fee/google_cloud_bigquery-3.41.0-py3-none-any.whl", hash = "sha256:2a5b5a737b401cbd824a6e5eac7554100b878668d908e6548836b5d8aaa4dcaa", size = 262343, upload-time = "2026-03-30T22:48:45.444Z" },
+]
+
+[[package]]
+name = "google-cloud-core"
+version = "2.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core" },
+    { name = "google-auth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dc/24/6ca08b0a03c7b0c620427503ab00353a4ae806b848b93bcea18b6b76fde6/google_cloud_core-2.5.1.tar.gz", hash = "sha256:3dc94bdec9d05a31d9f355045ed0f369fbc0d8c665076c734f065d729800f811", size = 36078, upload-time = "2026-03-30T22:50:08.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/d9/5bb050cb32826466aa9b25f79e2ca2879fe66cb76782d4ed798dd7506151/google_cloud_core-2.5.1-py3-none-any.whl", hash = "sha256:ea62cdf502c20e3e14be8a32c05ed02113d7bef454e40ff3fab6fe1ec9f1f4e7", size = 29452, upload-time = "2026-03-30T22:48:31.567Z" },
+]
+
+[[package]]
+name = "google-cloud-resource-manager"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "grpc-google-iam-v1" },
+    { name = "grpcio" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/1a/13060cabf553d52d151d2afc26b39561e82853380d499dd525a0d422d9f0/google_cloud_resource_manager-1.17.0.tar.gz", hash = "sha256:0f486b62e2c58ff992a3a50fa0f4a96eef7750aa6c971bb373398ccb91828660", size = 464971, upload-time = "2026-03-26T22:17:29.204Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/f7/661d7a9023e877a226b5683429c3662f75a29ef45cb1464cf39adb689218/google_cloud_resource_manager-1.17.0-py3-none-any.whl", hash = "sha256:e479baf4b014a57f298e01b8279e3290b032e3476d69c8e5e1427af8f82739a5", size = 404403, upload-time = "2026-03-26T22:15:26.57Z" },
+]
+
+[[package]]
+name = "google-cloud-storage"
+version = "3.10.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core" },
+    { name = "google-auth" },
+    { name = "google-cloud-core" },
+    { name = "google-crc32c" },
+    { name = "google-resumable-media" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4c/47/205eb8e9a1739b5345843e5a425775cbdc472cc38e7eda082ba5b8d02450/google_cloud_storage-3.10.1.tar.gz", hash = "sha256:97db9aa4460727982040edd2bd13ff3d5e2260b5331ad22895802da1fc2a5286", size = 17309950, upload-time = "2026-03-23T09:35:23.409Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/ff/ca9ab2417fa913d75aae38bf40bf856bb2749a604b2e0f701b37cfcd23cc/google_cloud_storage-3.10.1-py3-none-any.whl", hash = "sha256:a72f656759b7b99bda700f901adcb3425a828d4a29f911bc26b3ea79c5b1217f", size = 324453, upload-time = "2026-03-23T09:35:21.368Z" },
+]
+
+[[package]]
+name = "google-crc32c"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/ef/21ccfaab3d5078d41efe8612e0ed0bfc9ce22475de074162a91a25f7980d/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8", size = 31298, upload-time = "2025-12-16T00:20:32.241Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/b8/f8413d3f4b676136e965e764ceedec904fe38ae8de0cdc52a12d8eb1096e/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7", size = 30872, upload-time = "2025-12-16T00:33:58.785Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/fd/33aa4ec62b290477181c55bb1c9302c9698c58c0ce9a6ab4874abc8b0d60/google_crc32c-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15", size = 33243, upload-time = "2025-12-16T00:40:21.46Z" },
+    { url = "https://files.pythonhosted.org/packages/71/03/4820b3bd99c9653d1a5210cb32f9ba4da9681619b4d35b6a052432df4773/google_crc32c-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a", size = 33608, upload-time = "2025-12-16T00:40:22.204Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/43/acf61476a11437bf9733fb2f70599b1ced11ec7ed9ea760fdd9a77d0c619/google_crc32c-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2", size = 34439, upload-time = "2025-12-16T00:35:20.458Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" },
+    { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" },
+    { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297, upload-time = "2025-12-16T00:23:20.709Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867, upload-time = "2025-12-16T00:43:14.628Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344, upload-time = "2025-12-16T00:40:24.742Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694, upload-time = "2025-12-16T00:40:25.505Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435, upload-time = "2025-12-16T00:35:22.107Z" },
+    { url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301, upload-time = "2025-12-16T00:24:48.527Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868, upload-time = "2025-12-16T00:48:12.163Z" },
+    { url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381, upload-time = "2025-12-16T00:40:26.268Z" },
+    { url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734, upload-time = "2025-12-16T00:40:27.028Z" },
+    { url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878, upload-time = "2025-12-16T00:35:23.142Z" },
+    { url = "https://files.pythonhosted.org/packages/52/c5/c171e4d8c44fec1422d801a6d2e5d7ddabd733eeda505c79730ee9607f07/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93", size = 28615, upload-time = "2025-12-16T00:40:29.298Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/97/7d75fe37a7a6ed171a2cf17117177e7aab7e6e0d115858741b41e9dd4254/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c", size = 28800, upload-time = "2025-12-16T00:40:30.322Z" },
+]
+
 [[package]]
 name = "google-genai"
 version = "1.52.0"
@@ -2191,6 +2315,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
 ]

+[[package]]
+name = "google-resumable-media"
+version = "2.8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-crc32c" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3f/d1/b1ea14b93b6b78f57fc580125de44e9f593ab88dd2460f1a8a8d18f74754/google_resumable_media-2.8.2.tar.gz", hash = "sha256:f3354a182ebd193ae3f42e3ef95e6c9b10f128320de23ac7637236713b1acd70", size = 2164510, upload-time = "2026-03-30T23:34:25.369Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5e/f8/50bfaf4658431ff9de45c5c3935af7ab01157a4903c603cd0eee6e78e087/google_resumable_media-2.8.2-py3-none-any.whl", hash = "sha256:82b6d8ccd11765268cdd2a2123f417ec806b8eef3000a9a38dfe3033da5fb220", size = 81511, upload-time = "2026-03-30T23:34:09.671Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.72.0"
@@ -2203,6 +2339,11 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
 ]

+[package.optional-dependencies]
+grpc = [
+    { name = "grpcio" },
+]
+
 [[package]]
 name = "greenlet"
 version = "3.2.4"
@@ -2253,6 +2394,85 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
 ]

+[[package]]
+name = "grpc-google-iam-v1"
+version = "0.14.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos", extra = ["grpc"] },
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/44/4f/d098419ad0bfc06c9ce440575f05aa22d8973b6c276e86ac7890093d3c37/grpc_google_iam_v1-0.14.4.tar.gz", hash = "sha256:392b3796947ed6334e61171d9ab06bf7eb357f554e5fc7556ad7aab6d0e17038", size = 23706, upload-time = "2026-04-01T01:57:49.813Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/22/c2dd50c09bf679bd38173656cd4402d2511e563b33bc88f90009cf50613c/grpc_google_iam_v1-0.14.4-py3-none-any.whl", hash = "sha256:412facc320fcbd94034b4df3d557662051d4d8adfa86e0ddb4dca70a3f739964", size = 32675, upload-time = "2026-04-01T01:57:47.69Z" },
+]
+
+[[package]]
+name = "grpcio"
+version = "1.80.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" },
+    { url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" },
+    { url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" },
+    { url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" },
+    { url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" },
+    { url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" },
+    { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" },
+    { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" },
+    { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" },
+    { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" },
+    { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" },
+    { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" },
+    { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" },
+    { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" },
+    { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" },
+    { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" },
+]
+
+[[package]]
+name = "grpcio-status"
+version = "1.80.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos" },
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/ed/105f619bdd00cb47a49aa2feea6232ea2bbb04199d52a22cc6a7d603b5cb/grpcio_status-1.80.0.tar.gz", hash = "sha256:df73802a4c89a3ea88aa2aff971e886fccce162bc2e6511408b3d67a144381cd", size = 13901, upload-time = "2026-03-30T08:54:34.784Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/80/58cd2dfc19a07d022abe44bde7c365627f6c7cb6f692ada6c65ca437d09a/grpcio_status-1.80.0-py3-none-any.whl", hash = "sha256:4b56990363af50dbf2c2ebb80f1967185c07d87aa25aa2bea45ddb75fc181dbe", size = 14638, upload-time = "2026-03-30T08:54:01.569Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -3149,6 +3369,11 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e6/05/3516cc7386b220d388aa0bd833308c677e94eceb82b2756dd95e06f6a13f/litellm-1.81.6-py3-none-any.whl", hash = "sha256:573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a", size = 12224343, upload-time = "2026-02-01T04:02:23.704Z" },
 ]

+[package.optional-dependencies]
+google = [
+    { name = "google-cloud-aiplatform" },
+]
+
 [[package]]
 name = "locket"
 version = "1.0.0"
@@ -4217,7 +4442,7 @@ dependencies = [
    { name = "fastapi" },
    { name = "google-genai" },
    { name = "kubernetes" },
-    { name = "litellm" },
+    { name = "litellm", extra = ["google"] },
    { name = "openai" },
    { name = "prometheus-client" },
    { name = "prometheus-fastapi-instrumentator" },
@@ -4391,7 +4616,7 @@ requires-dist = [
    { name = "fastapi", specifier = "==0.133.1" },
    { name = "google-genai", specifier = "==1.52.0" },
    { name = "kubernetes", specifier = ">=31.0.0" },
-    { name = "litellm", specifier = "==1.81.6" },
+    { name = "litellm", extras = ["google"], specifier = "==1.81.6" },
    { name = "openai", specifier = "==2.14.0" },
    { name = "prometheus-client", specifier = ">=0.21.1" },
    { name = "prometheus-fastapi-instrumentator", specifier = "==7.1.0" },
--- a/web/Dockerfile
+++ b/web/Dockerfile
@@ -82,7 +82,10 @@ ARG NODE_OPTIONS
 # SENTRY_AUTH_TOKEN is injected via BuildKit secret mount so it is never written
 # to any image layer, build cache, or registry manifest.
 # Use NODE_OPTIONS in the build command
-RUN --mount=type=secret,id=sentry_auth_token,env=SENTRY_AUTH_TOKEN \
+RUN --mount=type=secret,id=sentry_auth_token \
+    if [ -f /run/secrets/sentry_auth_token ]; then \
+        export SENTRY_AUTH_TOKEN="$(cat /run/secrets/sentry_auth_token)"; \
+    fi && \
    NODE_OPTIONS="${NODE_OPTIONS}" npx next build

 # Step 2. Production image, copy all the files and run next
--- a/web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
+++ b/web/src/app/craft/v1/configure/components/UserLibraryModal.tsx
@@ -271,6 +271,22 @@ export default function UserLibraryModal({
                  />
                </Section>

+                {/* The exact cap is controlled by the backend env var
+                    MAX_EMBEDDED_IMAGES_PER_FILE (default 500). This copy is
+                    deliberately vague so it doesn't drift if the limit is
+                    tuned per-deployment; the precise number is surfaced in
+                    the rejection error the server returns. */}
+                <Section
+                  flexDirection="row"
+                  justifyContent="end"
+                  padding={0.5}
+                  height="fit"
+                >
+                  <Text secondaryBody text03>
+                    PDFs with many embedded images may be rejected.
+                  </Text>
+                </Section>
+
                {isLoading ? (
                  <Section padding={2} height="fit">
                    <Text secondaryBody text03>
--- a/web/src/components/llm/LLMSelector.tsx
+++ b/web/src/components/llm/LLMSelector.tsx
@@ -12,9 +12,9 @@ interface LLMOption {
  value: string;
  icon: ReturnType<typeof getModelIcon>;
  modelName: string;
+  providerId: number;
  providerName: string;
  provider: string;
-  providerDisplayName: string;
  supportsImageInput: boolean;
  vendor: string | null;
 }
@@ -64,7 +64,7 @@ export default function LLMSelector({
          return;
        }

-        const key = `${provider.provider}:${modelConfiguration.name}`;
+        const key = `${provider.id}:${modelConfiguration.name}`;
        if (seenKeys.has(key)) {
          return; // Skip exact duplicate
        }
@@ -87,10 +87,9 @@ export default function LLMSelector({
          ),
          icon: getModelIcon(provider.provider, modelConfiguration.name),
          modelName: modelConfiguration.name,
+          providerId: provider.id,
          providerName: provider.name,
          provider: provider.provider,
-          providerDisplayName:
-            provider.provider_display_name || provider.provider,
          supportsImageInput,
          vendor: modelConfiguration.vendor || null,
        };
@@ -108,33 +107,34 @@ export default function LLMSelector({
    requiresImageGeneration,
  ]);

-  // Group options by provider using backend-provided display names
+  // Group options by configured provider instance so multiple instances of the
+  // same provider type (e.g., two Anthropic API keys) appear as separate groups
+  // labeled with their user-given names.
  const groupedOptions = useMemo(() => {
    const groups = new Map<
-      string,
+      number,
      { displayName: string; options: LLMOption[] }
    >();

    llmOptions.forEach((option) => {
-      const provider = option.provider.toLowerCase();
-      if (!groups.has(provider)) {
-        groups.set(provider, {
-          displayName: option.providerDisplayName,
+      if (!groups.has(option.providerId)) {
+        groups.set(option.providerId, {
+          displayName: option.providerName,
          options: [],
        });
      }
-      groups.get(provider)!.options.push(option);
+      groups.get(option.providerId)!.options.push(option);
    });

    // Sort groups alphabetically by display name
-    const sortedProviders = Array.from(groups.keys()).sort((a, b) =>
+    const sortedProviderIds = Array.from(groups.keys()).sort((a, b) =>
      groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)
    );

-    return sortedProviders.map((provider) => {
-      const group = groups.get(provider)!;
+    return sortedProviderIds.map((providerId) => {
+      const group = groups.get(providerId)!;
      return {
-        provider,
+        providerId,
        displayName: group.displayName,
        options: group.options,
      };
@@ -179,7 +179,7 @@ export default function LLMSelector({
        )}
        {showGrouped
          ? groupedOptions.map((group) => (
-              <InputSelect.Group key={group.provider}>
+              <InputSelect.Group key={group.providerId}>
                <InputSelect.Label>{group.displayName}</InputSelect.Label>
                {group.options.map((option) => (
                  <InputSelect.Item
--- a/web/src/refresh-components/buttons/LineItem.tsx
+++ b/web/src/refresh-components/buttons/LineItem.tsx
@@ -82,6 +82,7 @@ export interface LineItemProps

  selected?: boolean;
  icon?: React.FunctionComponent<IconProps>;
+  strokeIcon?: boolean;
  description?: string;
  rightChildren?: React.ReactNode;
  href?: string;
@@ -154,6 +155,7 @@ export default function LineItem({
  skeleton,
  emphasized,
  icon: Icon,
+  strokeIcon = true,
  description,
  children,
  rightChildren,
@@ -245,7 +247,12 @@ export default function LineItem({
            !!(children && description) && "mt-0.5"
          )}
        >
-          <Icon className={cn("h-[1rem] w-[1rem]", iconClassNames[variant])} />
+          <Icon
+            className={cn(
+              "h-[1rem] w-[1rem]",
+              strokeIcon && iconClassNames[variant]
+            )}
+          />
        </div>
      )}
      <Section alignItems="start" gap={0}>
--- a/web/src/refresh-components/popovers/ActionsPopover/SwitchList.tsx
+++ b/web/src/refresh-components/popovers/ActionsPopover/SwitchList.tsx
@@ -103,6 +103,7 @@ export default function SwitchList({
                        item.leading) as React.FunctionComponent<IconProps>)
                    : undefined
                }
+                strokeIcon={false}
                rightChildren={
                  <Switch
                    checked={item.isEnabled}
--- a/web/src/refresh-components/popovers/ModelListContent.tsx
+++ b/web/src/refresh-components/popovers/ModelListContent.tsx
@@ -172,6 +172,7 @@ export default function ModelListContent({
                        <LineItem
                          muted
                          icon={group.Icon}
+                          strokeIcon={false}
                          rightChildren={
                            open ? (
                              <SvgChevronDown className="h-4 w-4 stroke-text-04 shrink-0" />
--- a/web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/index.tsx
+++ b/web/src/refresh-pages/admin/GroupsPage/SharedGroupResources/index.tsx
@@ -146,6 +146,7 @@ function SharedGroupResources({
              interactive={!dimmed}
              muted={dimmed}
              icon={getSourceMetadata(p.connector.source).icon}
+              strokeIcon={false}
              rightChildren={
                p.groups.length > 0 || dimmed ? <SharedBadge /> : undefined
              }
--- a/web/src/refresh-pages/admin/UsersPage/UserFilters.tsx
+++ b/web/src/refresh-pages/admin/UsersPage/UserFilters.tsx
@@ -186,6 +186,7 @@ export default function UserFilters({
                <LineItem
                  key={role}
                  icon={isSelected ? SvgCheck : roleIcon}
+                  strokeIcon={isSelected || role !== UserRole.SLACK_USER}
                  selected={isSelected}
                  emphasized={isSelected}
                  onClick={() => toggleRole(role)}
--- a/web/src/sections/knowledge/AgentKnowledgePane.tsx
+++ b/web/src/sections/knowledge/AgentKnowledgePane.tsx
@@ -131,6 +131,7 @@ function KnowledgeSidebar({
              <LineItem
                key={connectedSource.source}
                icon={sourceMetadata.icon}
+                strokeIcon={false}
                onClick={() => onNavigateToSource(connectedSource.source)}
                selected={isActive}
                emphasized={isActive || isSelected || selectionCount > 0}
@@ -720,6 +721,7 @@ const KnowledgeAddView = memo(function KnowledgeAddView({
              <LineItem
                key={connectedSource.source}
                icon={sourceMetadata.icon}
+                strokeIcon={false}
                onClick={() => onNavigateToSource(connectedSource.source)}
                emphasized={isSelected || selectionCount > 0}
                aria-label={`knowledge-add-source-${connectedSource.source}`}
Author	SHA1	Message	Date
Nikolas Garza	bf173654aa	feat(connectors): convert Gong connector from poll to checkpointed (#10258 ) to release v3.2 (#10359 )	2026-04-20 09:40:52 -07:00
Justin Tahara	de0575352b	fix(metrics): Adding in hostname (#10335 )	2026-04-17 13:39:48 -07:00
github-actions[bot]	d9185bcd2d	fix(deps): install transitive vertexai dependency (#10328 ) to release v3.2 (#10332 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-04-17 12:17:25 -07:00
Justin Tahara	5c36d7bcd6	fix(web): Sentry Token Check (#10310 )	2026-04-17 09:02:48 -07:00
github-actions[bot]	f81dc07afb	fix(fe): LineItem can disable icon stroke (#10289 ) to release v3.2 (#10315 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-04-17 08:27:31 -07:00
Nikolas Garza	1215ef4576	chore(hotfix): cherry-pick 3 commits to release v3.2 (#10309 ) Co-authored-by: Bo-Onyx <bo@onyx.app> Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>	2026-04-16 21:55:17 -07:00
Nikolas Garza	63d6f01895	chore(hotfix): cherry-pick 5 commits to release v3.2 (#10306 ) Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>	2026-04-16 21:54:41 -07:00
Justin Tahara	8fc2b3c3de	fix(image): Cap Uploaded File Image Count (#10298 )	2026-04-16 21:29:30 -07:00
github-actions[bot]	f5c48887f1	fix(llm-selector): show each provider instance as its own group (#10292 ) to release v3.2 (#10296 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev> Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-16 17:26:04 -07:00
github-actions[bot]	fe363bb62b	fix: gmail datetime parsing on unexpected values (#10290 ) to release v3.2 (#10294 ) Co-authored-by: Wenxi <wenxi@onyx.app>	2026-04-16 17:03:47 -07:00