mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-20 17:06:43 +00:00
Compare commits
10 Commits
v3.2.5
...
release/v3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf173654aa | ||
|
|
de0575352b | ||
|
|
d9185bcd2d | ||
|
|
5c36d7bcd6 | ||
|
|
f81dc07afb | ||
|
|
1215ef4576 | ||
|
|
63d6f01895 | ||
|
|
8fc2b3c3de | ||
|
|
f5c48887f1 | ||
|
|
fe363bb62b |
@@ -10,6 +10,7 @@ from celery import bootsteps # type: ignore
|
||||
from celery import Task
|
||||
from celery.app import trace
|
||||
from celery.exceptions import WorkerShutdown
|
||||
from celery.signals import before_task_publish
|
||||
from celery.signals import task_postrun
|
||||
from celery.signals import task_prerun
|
||||
from celery.states import READY_STATES
|
||||
@@ -94,6 +95,17 @@ class TenantAwareTask(Task):
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.set(None)
|
||||
|
||||
|
||||
@before_task_publish.connect
|
||||
def on_before_task_publish(
|
||||
headers: dict[str, Any] | None = None,
|
||||
**kwargs: Any, # noqa: ARG001
|
||||
) -> None:
|
||||
"""Stamp the current wall-clock time into the task message headers so that
|
||||
workers can compute queue wait time (time between publish and execution)."""
|
||||
if headers is not None:
|
||||
headers["enqueued_at"] = time.time()
|
||||
|
||||
|
||||
@task_prerun.connect
|
||||
def on_task_prerun(
|
||||
sender: Any | None = None, # noqa: ARG001
|
||||
|
||||
@@ -16,6 +16,12 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
|
||||
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
|
||||
from onyx.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME
|
||||
from onyx.db.engine.sql_engine import SqlEngine
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
|
||||
from onyx.server.metrics.metrics_server import start_metrics_server
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
@@ -36,6 +42,7 @@ def on_task_prerun(
|
||||
**kwds: Any,
|
||||
) -> None:
|
||||
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
|
||||
on_celery_task_prerun(task_id, task)
|
||||
|
||||
|
||||
@signals.task_postrun.connect
|
||||
@@ -50,6 +57,31 @@ def on_task_postrun(
|
||||
**kwds: Any,
|
||||
) -> None:
|
||||
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
|
||||
on_celery_task_postrun(task_id, task, state)
|
||||
|
||||
|
||||
@signals.task_retry.connect
|
||||
def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
|
||||
task_id = getattr(getattr(sender, "request", None), "id", None)
|
||||
on_celery_task_retry(task_id, sender)
|
||||
|
||||
|
||||
@signals.task_revoked.connect
|
||||
def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
|
||||
task_name = getattr(sender, "name", None) or str(sender)
|
||||
on_celery_task_revoked(kwargs.get("task_id"), task_name)
|
||||
|
||||
|
||||
@signals.task_rejected.connect
|
||||
def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
|
||||
message = kwargs.get("message")
|
||||
task_name: str | None = None
|
||||
if message is not None:
|
||||
headers = getattr(message, "headers", None) or {}
|
||||
task_name = headers.get("task")
|
||||
if task_name is None:
|
||||
task_name = "unknown"
|
||||
on_celery_task_rejected(None, task_name)
|
||||
|
||||
|
||||
@celeryd_init.connect
|
||||
@@ -90,6 +122,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
@worker_ready.connect
|
||||
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
|
||||
start_metrics_server("light")
|
||||
app_base.on_worker_ready(sender, **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -38,6 +38,12 @@ from onyx.redis.redis_connector_stop import RedisConnectorStop
|
||||
from onyx.redis.redis_document_set import RedisDocumentSet
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.redis.redis_usergroup import RedisUserGroup
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
|
||||
from onyx.server.metrics.metrics_server import start_metrics_server
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
|
||||
@@ -59,6 +65,7 @@ def on_task_prerun(
|
||||
**kwds: Any,
|
||||
) -> None:
|
||||
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
|
||||
on_celery_task_prerun(task_id, task)
|
||||
|
||||
|
||||
@signals.task_postrun.connect
|
||||
@@ -73,6 +80,31 @@ def on_task_postrun(
|
||||
**kwds: Any,
|
||||
) -> None:
|
||||
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
|
||||
on_celery_task_postrun(task_id, task, state)
|
||||
|
||||
|
||||
@signals.task_retry.connect
|
||||
def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
|
||||
task_id = getattr(getattr(sender, "request", None), "id", None)
|
||||
on_celery_task_retry(task_id, sender)
|
||||
|
||||
|
||||
@signals.task_revoked.connect
|
||||
def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
|
||||
task_name = getattr(sender, "name", None) or str(sender)
|
||||
on_celery_task_revoked(kwargs.get("task_id"), task_name)
|
||||
|
||||
|
||||
@signals.task_rejected.connect
|
||||
def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
|
||||
message = kwargs.get("message")
|
||||
task_name: str | None = None
|
||||
if message is not None:
|
||||
headers = getattr(message, "headers", None) or {}
|
||||
task_name = headers.get("task")
|
||||
if task_name is None:
|
||||
task_name = "unknown"
|
||||
on_celery_task_rejected(None, task_name)
|
||||
|
||||
|
||||
@celeryd_init.connect
|
||||
@@ -212,6 +244,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
@worker_ready.connect
|
||||
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
|
||||
start_metrics_server("primary")
|
||||
app_base.on_worker_ready(sender, **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -59,6 +59,11 @@ from onyx.redis.redis_connector_delete import RedisConnectorDelete
|
||||
from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.redis.redis_pool import get_redis_replica_client
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_blocked
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_completed
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_fence_reset
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_started
|
||||
from onyx.server.metrics.deletion_metrics import observe_deletion_taskset_duration
|
||||
from onyx.utils.variable_functionality import (
|
||||
fetch_versioned_implementation_with_fallback,
|
||||
)
|
||||
@@ -300,6 +305,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
|
||||
recent_index_attempts
|
||||
and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS
|
||||
):
|
||||
inc_deletion_blocked(tenant_id, "indexing")
|
||||
raise TaskDependencyError(
|
||||
"Connector deletion - Delayed (indexing in progress): "
|
||||
f"cc_pair={cc_pair_id} "
|
||||
@@ -307,11 +313,13 @@ def try_generate_document_cc_pair_cleanup_tasks(
|
||||
)
|
||||
|
||||
if redis_connector.prune.fenced:
|
||||
inc_deletion_blocked(tenant_id, "pruning")
|
||||
raise TaskDependencyError(
|
||||
f"Connector deletion - Delayed (pruning in progress): cc_pair={cc_pair_id}"
|
||||
)
|
||||
|
||||
if redis_connector.permissions.fenced:
|
||||
inc_deletion_blocked(tenant_id, "permissions")
|
||||
raise TaskDependencyError(
|
||||
f"Connector deletion - Delayed (permissions in progress): cc_pair={cc_pair_id}"
|
||||
)
|
||||
@@ -359,6 +367,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
|
||||
# set this only after all tasks have been added
|
||||
fence_payload.num_tasks = tasks_generated
|
||||
redis_connector.delete.set_fence(fence_payload)
|
||||
inc_deletion_started(tenant_id)
|
||||
|
||||
return tasks_generated
|
||||
|
||||
@@ -523,6 +532,12 @@ def monitor_connector_deletion_taskset(
|
||||
num_docs_synced=fence_data.num_tasks,
|
||||
)
|
||||
|
||||
duration = (
|
||||
datetime.now(timezone.utc) - fence_data.submitted
|
||||
).total_seconds()
|
||||
observe_deletion_taskset_duration(tenant_id, "success", duration)
|
||||
inc_deletion_completed(tenant_id, "success")
|
||||
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
stack_trace = traceback.format_exc()
|
||||
@@ -541,6 +556,11 @@ def monitor_connector_deletion_taskset(
|
||||
f"Connector deletion exceptioned: "
|
||||
f"cc_pair={cc_pair_id} connector={connector_id_to_delete} credential={credential_id_to_delete}"
|
||||
)
|
||||
duration = (
|
||||
datetime.now(timezone.utc) - fence_data.submitted
|
||||
).total_seconds()
|
||||
observe_deletion_taskset_duration(tenant_id, "failure", duration)
|
||||
inc_deletion_completed(tenant_id, "failure")
|
||||
raise e
|
||||
|
||||
task_logger.info(
|
||||
@@ -717,5 +737,6 @@ def validate_connector_deletion_fence(
|
||||
f"fence={fence_key}"
|
||||
)
|
||||
|
||||
inc_deletion_fence_reset(tenant_id)
|
||||
redis_connector.delete.reset()
|
||||
return
|
||||
|
||||
@@ -34,6 +34,7 @@ from onyx.db.index_attempt import mark_attempt_canceled
|
||||
from onyx.db.index_attempt import mark_attempt_failed
|
||||
from onyx.db.indexing_coordination import IndexingCoordination
|
||||
from onyx.redis.redis_connector import RedisConnector
|
||||
from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import global_version
|
||||
from shared_configs.configs import SENTRY_DSN
|
||||
@@ -467,6 +468,15 @@ def docfetching_proxy_task(
|
||||
index_attempt.connector_credential_pair.connector.source.value
|
||||
)
|
||||
|
||||
cc_pair = index_attempt.connector_credential_pair
|
||||
on_index_attempt_status_change(
|
||||
tenant_id=tenant_id,
|
||||
source=result.connector_source,
|
||||
cc_pair_id=cc_pair_id,
|
||||
connector_name=cc_pair.connector.name or f"cc_pair_{cc_pair_id}",
|
||||
status="in_progress",
|
||||
)
|
||||
|
||||
while True:
|
||||
sleep(5)
|
||||
|
||||
|
||||
@@ -105,6 +105,9 @@ from onyx.redis.redis_pool import get_redis_replica_client
|
||||
from onyx.redis.redis_pool import redis_lock_dump
|
||||
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
|
||||
from onyx.redis.redis_utils import is_fence
|
||||
from onyx.server.metrics.connector_health_metrics import on_connector_error_state_change
|
||||
from onyx.server.metrics.connector_health_metrics import on_connector_indexing_success
|
||||
from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
|
||||
from onyx.server.runtime.onyx_runtime import OnyxRuntime
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.middleware import make_randomized_onyx_request_id
|
||||
@@ -400,7 +403,6 @@ def check_indexing_completion(
|
||||
tenant_id: str,
|
||||
task: Task,
|
||||
) -> None:
|
||||
|
||||
logger.info(
|
||||
f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
|
||||
)
|
||||
@@ -521,13 +523,25 @@ def check_indexing_completion(
|
||||
|
||||
# Update CC pair status if successful
|
||||
cc_pair = get_connector_credential_pair_from_id(
|
||||
db_session, attempt.connector_credential_pair_id
|
||||
db_session,
|
||||
attempt.connector_credential_pair_id,
|
||||
eager_load_connector=True,
|
||||
)
|
||||
if cc_pair is None:
|
||||
raise RuntimeError(
|
||||
f"CC pair {attempt.connector_credential_pair_id} not found in database"
|
||||
)
|
||||
|
||||
source = cc_pair.connector.source.value
|
||||
connector_name = cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
|
||||
on_index_attempt_status_change(
|
||||
tenant_id=tenant_id,
|
||||
source=source,
|
||||
cc_pair_id=cc_pair.id,
|
||||
connector_name=connector_name,
|
||||
status=attempt.status.value,
|
||||
)
|
||||
|
||||
if attempt.status.is_successful():
|
||||
# NOTE: we define the last successful index time as the time the last successful
|
||||
# attempt finished. This is distinct from the poll_range_end of the last successful
|
||||
@@ -548,10 +562,26 @@ def check_indexing_completion(
|
||||
event=MilestoneRecordType.CONNECTOR_SUCCEEDED,
|
||||
)
|
||||
|
||||
on_connector_indexing_success(
|
||||
tenant_id=tenant_id,
|
||||
source=source,
|
||||
cc_pair_id=cc_pair.id,
|
||||
connector_name=connector_name,
|
||||
docs_indexed=attempt.new_docs_indexed or 0,
|
||||
success_timestamp=attempt.time_updated.timestamp(),
|
||||
)
|
||||
|
||||
# Clear repeated error state on success
|
||||
if cc_pair.in_repeated_error_state:
|
||||
cc_pair.in_repeated_error_state = False
|
||||
db_session.commit()
|
||||
on_connector_error_state_change(
|
||||
tenant_id=tenant_id,
|
||||
source=source,
|
||||
cc_pair_id=cc_pair.id,
|
||||
connector_name=connector_name,
|
||||
in_error=False,
|
||||
)
|
||||
|
||||
if attempt.status == IndexingStatus.SUCCESS:
|
||||
logger.info(
|
||||
@@ -848,6 +878,16 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
|
||||
cc_pair_id=cc_pair_id,
|
||||
in_repeated_error_state=True,
|
||||
)
|
||||
error_connector_name = (
|
||||
cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
|
||||
)
|
||||
on_connector_error_state_change(
|
||||
tenant_id=tenant_id,
|
||||
source=cc_pair.connector.source.value,
|
||||
cc_pair_id=cc_pair_id,
|
||||
connector_name=error_connector_name,
|
||||
in_error=True,
|
||||
)
|
||||
# When entering repeated error state, also pause the connector
|
||||
# to prevent continued indexing retry attempts burning through embedding credits.
|
||||
# NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
|
||||
|
||||
@@ -840,6 +840,29 @@ MAX_FILE_SIZE_BYTES = int(
|
||||
os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
|
||||
) # 2GB in bytes
|
||||
|
||||
# Maximum embedded images allowed in a single file. PDFs (and other formats)
|
||||
# with thousands of embedded images can OOM the user-file-processing worker
|
||||
# because every image is decoded with PIL and then sent to the vision LLM.
|
||||
# Enforced both at upload time (rejects the file) and during extraction
|
||||
# (defense-in-depth: caps the number of images materialized).
|
||||
#
|
||||
# Clamped to >= 0; a negative env value would turn upload validation into
|
||||
# always-fail and extraction into always-stop, which is never desired. 0
|
||||
# disables image extraction entirely, which is a valid (if aggressive) setting.
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE = max(
|
||||
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
|
||||
)
|
||||
|
||||
# Maximum embedded images allowed across all files in a single upload batch.
|
||||
# Protects against the scenario where a user uploads many files that each
|
||||
# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
|
||||
# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
|
||||
# worker under concurrency or run up surprise latency/cost. Also clamped
|
||||
# to >= 0.
|
||||
MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
|
||||
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
|
||||
)
|
||||
|
||||
# Use document summary for contextual rag
|
||||
USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
|
||||
# Use chunk summary for contextual rag
|
||||
|
||||
@@ -3,6 +3,7 @@ from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Any
|
||||
from typing import TypeVar
|
||||
from urllib.parse import urljoin
|
||||
@@ -10,7 +11,6 @@ from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from dateutil.parser import parse
|
||||
from dateutil.parser import ParserError
|
||||
|
||||
from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
@@ -56,18 +56,16 @@ def time_str_to_utc(datetime_str: str) -> datetime:
|
||||
if fixed not in candidates:
|
||||
candidates.append(fixed)
|
||||
|
||||
last_exception: Exception | None = None
|
||||
for candidate in candidates:
|
||||
try:
|
||||
dt = parse(candidate)
|
||||
return datetime_to_utc(dt)
|
||||
except (ValueError, ParserError) as exc:
|
||||
last_exception = exc
|
||||
# dateutil is the primary; the stdlib RFC 2822 parser is a fallback for
|
||||
# inputs dateutil rejects (e.g. headers concatenated without a CRLF —
|
||||
# TZ may be dropped, datetime_to_utc then assumes UTC).
|
||||
for parser in (parse, parsedate_to_datetime):
|
||||
for candidate in candidates:
|
||||
try:
|
||||
return datetime_to_utc(parser(candidate))
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
continue
|
||||
|
||||
if last_exception is not None:
|
||||
raise last_exception
|
||||
|
||||
# Fallback in case parsing failed without raising (should not happen)
|
||||
raise ValueError(f"Unable to parse datetime string: {datetime_str}")
|
||||
|
||||
|
||||
|
||||
@@ -253,7 +253,17 @@ def thread_to_document(
|
||||
|
||||
updated_at_datetime = None
|
||||
if updated_at:
|
||||
updated_at_datetime = time_str_to_utc(updated_at)
|
||||
try:
|
||||
updated_at_datetime = time_str_to_utc(updated_at)
|
||||
except (ValueError, OverflowError) as e:
|
||||
# Old mailboxes contain RFC-violating Date headers. Drop the
|
||||
# timestamp instead of aborting the indexing run.
|
||||
logger.warning(
|
||||
"Skipping unparseable Gmail Date header on thread %s: %r (%s)",
|
||||
full_thread.get("id"),
|
||||
updated_at,
|
||||
e,
|
||||
)
|
||||
|
||||
id = full_thread.get("id")
|
||||
if not id:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import copy
|
||||
import time
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
@@ -8,27 +9,58 @@ from typing import Any
|
||||
from typing import cast
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util import Retry
|
||||
|
||||
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
|
||||
from onyx.configs.app_configs import GONG_CONNECTOR_START_TIME
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import PollConnector
|
||||
from onyx.connectors.interfaces import CheckpointedConnector
|
||||
from onyx.connectors.interfaces import CheckpointOutput
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.models import ConnectorCheckpoint
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import DocumentFailure
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class GongConnector(LoadConnector, PollConnector):
|
||||
class GongConnectorCheckpoint(ConnectorCheckpoint):
|
||||
# Resolved workspace IDs to iterate through.
|
||||
# None means "not yet resolved" — first checkpoint call resolves them.
|
||||
# Inner None means "no workspace filter" (fetch all).
|
||||
workspace_ids: list[str | None] | None = None
|
||||
# Index into workspace_ids for current workspace
|
||||
workspace_index: int = 0
|
||||
# Gong API cursor for current workspace's transcript pagination
|
||||
cursor: str | None = None
|
||||
# Cached time range — computed once, reused across checkpoint calls
|
||||
time_range: tuple[str, str] | None = None
|
||||
|
||||
|
||||
class _TranscriptPage(BaseModel):
|
||||
"""One page of transcripts from /v2/calls/transcript."""
|
||||
|
||||
transcripts: list[dict[str, Any]]
|
||||
next_cursor: str | None = None
|
||||
|
||||
|
||||
class _CursorExpiredError(Exception):
|
||||
"""Raised when Gong rejects a pagination cursor as expired.
|
||||
|
||||
Gong pagination cursors TTL is ~1 hour from the first request in a
|
||||
pagination sequence, not from the last cursor fetch. Since checkpointed
|
||||
connector runs can pause between invocations, a resumed run may encounter
|
||||
an expired cursor and must restart the current workspace from scratch.
|
||||
See https://visioneers.gong.io/integrations-77/pagination-cursor-expires-after-1-hours-even-for-a-new-cursor-1382
|
||||
"""
|
||||
|
||||
|
||||
class GongConnector(CheckpointedConnector[GongConnectorCheckpoint]):
|
||||
BASE_URL = "https://api.gong.io"
|
||||
MAX_CALL_DETAILS_ATTEMPTS = 6
|
||||
CALL_DETAILS_DELAY = 30 # in seconds
|
||||
@@ -38,13 +70,9 @@ class GongConnector(LoadConnector, PollConnector):
|
||||
def __init__(
|
||||
self,
|
||||
workspaces: list[str] | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
continue_on_fail: bool = CONTINUE_ON_CONNECTOR_FAILURE,
|
||||
hide_user_info: bool = False,
|
||||
) -> None:
|
||||
self.workspaces = workspaces
|
||||
self.batch_size: int = batch_size
|
||||
self.continue_on_fail = continue_on_fail
|
||||
self.auth_token_basic: str | None = None
|
||||
self.hide_user_info = hide_user_info
|
||||
self._last_request_time: float = 0.0
|
||||
@@ -98,67 +126,50 @@ class GongConnector(LoadConnector, PollConnector):
|
||||
# Then the user input is treated as the name
|
||||
return {**id_id_map, **name_id_map}
|
||||
|
||||
def _get_transcript_batches(
|
||||
self, start_datetime: str | None = None, end_datetime: str | None = None
|
||||
) -> Generator[list[dict[str, Any]], None, None]:
|
||||
body: dict[str, dict] = {"filter": {}}
|
||||
def _fetch_transcript_page(
|
||||
self,
|
||||
start_datetime: str | None,
|
||||
end_datetime: str | None,
|
||||
workspace_id: str | None,
|
||||
cursor: str | None,
|
||||
) -> _TranscriptPage:
|
||||
"""Fetch one page of transcripts from the Gong API.
|
||||
|
||||
Raises _CursorExpiredError if Gong reports the pagination cursor
|
||||
expired (TTL is ~1 hour from first request in the pagination sequence).
|
||||
"""
|
||||
body: dict[str, Any] = {"filter": {}}
|
||||
if start_datetime:
|
||||
body["filter"]["fromDateTime"] = start_datetime
|
||||
if end_datetime:
|
||||
body["filter"]["toDateTime"] = end_datetime
|
||||
if workspace_id:
|
||||
body["filter"]["workspaceId"] = workspace_id
|
||||
if cursor:
|
||||
body["cursor"] = cursor
|
||||
|
||||
# The batch_ids in the previous method appears to be batches of call_ids to process
|
||||
# In this method, we will retrieve transcripts for them in batches.
|
||||
transcripts: list[dict[str, Any]] = []
|
||||
workspace_list = self.workspaces or [None] # type: ignore
|
||||
workspace_map = self._get_workspace_id_map() if self.workspaces else {}
|
||||
response = self._throttled_request(
|
||||
"POST", GongConnector.make_url("/v2/calls/transcript"), json=body
|
||||
)
|
||||
# If no calls in the range, return empty
|
||||
if response.status_code == 404:
|
||||
return _TranscriptPage(transcripts=[])
|
||||
|
||||
for workspace in workspace_list:
|
||||
if workspace:
|
||||
logger.info(f"Updating Gong workspace: {workspace}")
|
||||
workspace_id = workspace_map.get(workspace)
|
||||
if not workspace_id:
|
||||
logger.error(f"Invalid Gong workspace: {workspace}")
|
||||
if not self.continue_on_fail:
|
||||
raise ValueError(f"Invalid workspace: {workspace}")
|
||||
continue
|
||||
body["filter"]["workspaceId"] = workspace_id
|
||||
else:
|
||||
if "workspaceId" in body["filter"]:
|
||||
del body["filter"]["workspaceId"]
|
||||
if not response.ok:
|
||||
# Cursor expiration comes back as a 4xx with this error message —
|
||||
# detect it before raise_for_status so callers can restart the workspace.
|
||||
if cursor and "cursor has expired" in response.text.lower():
|
||||
raise _CursorExpiredError(response.text)
|
||||
logger.error(f"Error fetching transcripts: {response.text}")
|
||||
response.raise_for_status()
|
||||
|
||||
while True:
|
||||
response = self._throttled_request(
|
||||
"POST", GongConnector.make_url("/v2/calls/transcript"), json=body
|
||||
)
|
||||
# If no calls in the range, just break out
|
||||
if response.status_code == 404:
|
||||
break
|
||||
data = response.json()
|
||||
return _TranscriptPage(
|
||||
transcripts=data.get("callTranscripts", []),
|
||||
next_cursor=data.get("records", {}).get("cursor"),
|
||||
)
|
||||
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except Exception:
|
||||
logger.error(f"Error fetching transcripts: {response.text}")
|
||||
raise
|
||||
|
||||
data = response.json()
|
||||
call_transcripts = data.get("callTranscripts", [])
|
||||
transcripts.extend(call_transcripts)
|
||||
|
||||
while len(transcripts) >= self.batch_size:
|
||||
yield transcripts[: self.batch_size]
|
||||
transcripts = transcripts[self.batch_size :]
|
||||
|
||||
cursor = data.get("records", {}).get("cursor")
|
||||
if cursor:
|
||||
body["cursor"] = cursor
|
||||
else:
|
||||
break
|
||||
|
||||
if transcripts:
|
||||
yield transcripts
|
||||
|
||||
def _get_call_details_by_ids(self, call_ids: list[str]) -> dict:
|
||||
def _get_call_details_by_ids(self, call_ids: list[str]) -> dict[str, Any]:
|
||||
body = {
|
||||
"filter": {"callIds": call_ids},
|
||||
"contentSelector": {"exposedFields": {"parties": True}},
|
||||
@@ -176,6 +187,50 @@ class GongConnector(LoadConnector, PollConnector):
|
||||
|
||||
return call_to_metadata
|
||||
|
||||
def _fetch_call_details_with_retry(self, call_ids: list[str]) -> dict[str, Any]:
|
||||
"""Fetch call details with retry for the Gong API race condition.
|
||||
|
||||
The Gong API has a known race where transcript call IDs don't immediately
|
||||
appear in /v2/calls/extensive. Retries with exponential backoff, only
|
||||
re-requesting the missing IDs on each attempt.
|
||||
"""
|
||||
call_details_map = self._get_call_details_by_ids(call_ids)
|
||||
if set(call_ids) == set(call_details_map.keys()):
|
||||
return call_details_map
|
||||
|
||||
for attempt in range(2, self.MAX_CALL_DETAILS_ATTEMPTS + 1):
|
||||
missing_ids = list(set(call_ids) - set(call_details_map.keys()))
|
||||
logger.warning(
|
||||
f"_get_call_details_by_ids is missing call id's: current_attempt={attempt - 1} missing_call_ids={missing_ids}"
|
||||
)
|
||||
|
||||
wait_seconds = self.CALL_DETAILS_DELAY * pow(2, attempt - 2)
|
||||
logger.warning(
|
||||
f"_get_call_details_by_ids waiting to retry: "
|
||||
f"wait={wait_seconds}s "
|
||||
f"current_attempt={attempt - 1} "
|
||||
f"next_attempt={attempt} "
|
||||
f"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}"
|
||||
)
|
||||
time.sleep(wait_seconds)
|
||||
|
||||
# Only re-fetch the missing IDs, merge into existing results
|
||||
new_details = self._get_call_details_by_ids(missing_ids)
|
||||
call_details_map.update(new_details)
|
||||
|
||||
if set(call_ids) == set(call_details_map.keys()):
|
||||
return call_details_map
|
||||
|
||||
missing_ids = list(set(call_ids) - set(call_details_map.keys()))
|
||||
logger.error(
|
||||
f"Giving up on missing call id's after "
|
||||
f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
|
||||
f"missing_call_ids={missing_ids} — "
|
||||
f"proceeding with {len(call_details_map)} of "
|
||||
f"{len(call_ids)} calls"
|
||||
)
|
||||
return call_details_map
|
||||
|
||||
@staticmethod
|
||||
def _parse_parties(parties: list[dict]) -> dict[str, str]:
|
||||
id_mapping = {}
|
||||
@@ -196,186 +251,46 @@ class GongConnector(LoadConnector, PollConnector):
|
||||
|
||||
return id_mapping
|
||||
|
||||
def _fetch_calls(
|
||||
self, start_datetime: str | None = None, end_datetime: str | None = None
|
||||
) -> GenerateDocumentsOutput:
|
||||
num_calls = 0
|
||||
def _resolve_workspace_ids(self) -> list[str | None]:
|
||||
"""Resolve configured workspace names/IDs to actual workspace IDs.
|
||||
|
||||
for transcript_batch in self._get_transcript_batches(
|
||||
start_datetime, end_datetime
|
||||
):
|
||||
doc_batch: list[Document | HierarchyNode] = []
|
||||
Returns a list of workspace IDs. If no workspaces are configured,
|
||||
returns [None] to indicate "fetch all workspaces".
|
||||
|
||||
transcript_call_ids = cast(
|
||||
list[str],
|
||||
[t.get("callId") for t in transcript_batch if t.get("callId")],
|
||||
Raises ValueError if workspaces are configured but none resolve —
|
||||
we never silently widen scope to "fetch all" on misconfiguration,
|
||||
because that could ingest an entire Gong account by mistake.
|
||||
"""
|
||||
if not self.workspaces:
|
||||
return [None]
|
||||
|
||||
workspace_map = self._get_workspace_id_map()
|
||||
resolved: list[str | None] = []
|
||||
for workspace in self.workspaces:
|
||||
workspace_id = workspace_map.get(workspace)
|
||||
if not workspace_id:
|
||||
logger.error(f"Invalid Gong workspace: {workspace}")
|
||||
continue
|
||||
resolved.append(workspace_id)
|
||||
|
||||
if not resolved:
|
||||
raise ValueError(
|
||||
f"No valid Gong workspaces found — check workspace names/IDs in connector config. Configured: {self.workspaces}"
|
||||
)
|
||||
|
||||
call_details_map: dict[str, Any] = {}
|
||||
return resolved
|
||||
|
||||
# There's a likely race condition in the API where a transcript will have a
|
||||
# call id but the call to v2/calls/extensive will not return all of the id's
|
||||
# retry with exponential backoff has been observed to mitigate this
|
||||
# in ~2 minutes. After max attempts, proceed with whatever we have —
|
||||
# the per-call loop below will skip missing IDs gracefully.
|
||||
current_attempt = 0
|
||||
while True:
|
||||
current_attempt += 1
|
||||
call_details_map = self._get_call_details_by_ids(transcript_call_ids)
|
||||
if set(transcript_call_ids) == set(call_details_map.keys()):
|
||||
# we got all the id's we were expecting ... break and continue
|
||||
break
|
||||
|
||||
# we are missing some id's. Log and retry with exponential backoff
|
||||
missing_call_ids = set(transcript_call_ids) - set(
|
||||
call_details_map.keys()
|
||||
)
|
||||
logger.warning(
|
||||
f"_get_call_details_by_ids is missing call id's: "
|
||||
f"current_attempt={current_attempt} "
|
||||
f"missing_call_ids={missing_call_ids}"
|
||||
)
|
||||
if current_attempt >= self.MAX_CALL_DETAILS_ATTEMPTS:
|
||||
logger.error(
|
||||
f"Giving up on missing call id's after "
|
||||
f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
|
||||
f"missing_call_ids={missing_call_ids} — "
|
||||
f"proceeding with {len(call_details_map)} of "
|
||||
f"{len(transcript_call_ids)} calls"
|
||||
)
|
||||
break
|
||||
|
||||
wait_seconds = self.CALL_DETAILS_DELAY * pow(2, current_attempt - 1)
|
||||
logger.warning(
|
||||
f"_get_call_details_by_ids waiting to retry: "
|
||||
f"wait={wait_seconds}s "
|
||||
f"current_attempt={current_attempt} "
|
||||
f"next_attempt={current_attempt + 1} "
|
||||
f"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}"
|
||||
)
|
||||
time.sleep(wait_seconds)
|
||||
|
||||
# now we can iterate per call/transcript
|
||||
for transcript in transcript_batch:
|
||||
call_id = transcript.get("callId")
|
||||
|
||||
if not call_id or call_id not in call_details_map:
|
||||
# NOTE(rkuo): seeing odd behavior where call_ids from the transcript
|
||||
# don't have call details. adding error debugging logs to trace.
|
||||
logger.error(
|
||||
f"Couldn't get call information for Call ID: {call_id}"
|
||||
)
|
||||
if call_id:
|
||||
logger.error(
|
||||
f"Call debug info: call_id={call_id} "
|
||||
f"call_ids={transcript_call_ids} "
|
||||
f"call_details_map={call_details_map.keys()}"
|
||||
)
|
||||
if not self.continue_on_fail:
|
||||
raise RuntimeError(
|
||||
f"Couldn't get call information for Call ID: {call_id}"
|
||||
)
|
||||
continue
|
||||
|
||||
call_details = call_details_map[call_id]
|
||||
call_metadata = call_details["metaData"]
|
||||
|
||||
call_time_str = call_metadata["started"]
|
||||
call_title = call_metadata["title"]
|
||||
logger.info(
|
||||
f"{num_calls + 1}: Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
|
||||
)
|
||||
|
||||
call_parties = cast(list[dict] | None, call_details.get("parties"))
|
||||
if call_parties is None:
|
||||
logger.error(f"Couldn't get parties for Call ID: {call_id}")
|
||||
call_parties = []
|
||||
|
||||
id_to_name_map = self._parse_parties(call_parties)
|
||||
|
||||
# Keeping a separate dict here in case the parties info is incomplete
|
||||
speaker_to_name: dict[str, str] = {}
|
||||
|
||||
transcript_text = ""
|
||||
call_purpose = call_metadata["purpose"]
|
||||
if call_purpose:
|
||||
transcript_text += f"Call Description: {call_purpose}\n\n"
|
||||
|
||||
contents = transcript["transcript"]
|
||||
for segment in contents:
|
||||
speaker_id = segment.get("speakerId", "")
|
||||
if speaker_id not in speaker_to_name:
|
||||
if self.hide_user_info:
|
||||
speaker_to_name[speaker_id] = (
|
||||
f"User {len(speaker_to_name) + 1}"
|
||||
)
|
||||
else:
|
||||
speaker_to_name[speaker_id] = id_to_name_map.get(
|
||||
speaker_id, "Unknown"
|
||||
)
|
||||
|
||||
speaker_name = speaker_to_name[speaker_id]
|
||||
|
||||
sentences = segment.get("sentences", {})
|
||||
monolog = " ".join(
|
||||
[sentence.get("text", "") for sentence in sentences]
|
||||
)
|
||||
transcript_text += f"{speaker_name}: {monolog}\n\n"
|
||||
|
||||
metadata = {}
|
||||
if call_metadata.get("system"):
|
||||
metadata["client"] = call_metadata.get("system")
|
||||
# TODO calls have a clientUniqueId field, can pull that in later
|
||||
|
||||
doc_batch.append(
|
||||
Document(
|
||||
id=call_id,
|
||||
sections=[
|
||||
TextSection(link=call_metadata["url"], text=transcript_text)
|
||||
],
|
||||
source=DocumentSource.GONG,
|
||||
# Should not ever be Untitled as a call cannot be made without a Title
|
||||
semantic_identifier=call_title or "Untitled",
|
||||
doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
|
||||
timezone.utc
|
||||
),
|
||||
metadata={"client": call_metadata.get("system")},
|
||||
)
|
||||
)
|
||||
|
||||
num_calls += 1
|
||||
|
||||
yield doc_batch
|
||||
|
||||
logger.info(f"_fetch_calls finished: num_calls={num_calls}")
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
combined = (
|
||||
f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
|
||||
)
|
||||
self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
if self.auth_token_basic is None:
|
||||
raise ConnectorMissingCredentialError("Gong")
|
||||
|
||||
self._session.headers.update(
|
||||
{"Authorization": f"Basic {self.auth_token_basic}"}
|
||||
)
|
||||
return None
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self._fetch_calls()
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
@staticmethod
|
||||
def _compute_time_range(
|
||||
start: SecondsSinceUnixEpoch,
|
||||
end: SecondsSinceUnixEpoch,
|
||||
) -> tuple[str, str]:
|
||||
"""Compute the start/end datetime strings for the Gong API filter,
|
||||
applying GONG_CONNECTOR_START_TIME and the 1-day offset."""
|
||||
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
|
||||
|
||||
# if this env variable is set, don't start from a timestamp before the specified
|
||||
# start time
|
||||
# TODO: remove this once this is globally available
|
||||
if GONG_CONNECTOR_START_TIME:
|
||||
special_start_datetime = datetime.fromisoformat(GONG_CONNECTOR_START_TIME)
|
||||
special_start_datetime = special_start_datetime.replace(tzinfo=timezone.utc)
|
||||
@@ -394,11 +309,186 @@ class GongConnector(LoadConnector, PollConnector):
|
||||
# so adding a 1 day buffer and fetching by default till current time
|
||||
start_one_day_offset = start_datetime - timedelta(days=1)
|
||||
start_time = start_one_day_offset.isoformat()
|
||||
end_time = end_datetime.isoformat()
|
||||
|
||||
end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()
|
||||
return start_time, end_time
|
||||
|
||||
logger.info(f"Fetching Gong calls between {start_time} and {end_time}")
|
||||
return self._fetch_calls(start_time, end_time)
|
||||
def _process_transcripts(
|
||||
self,
|
||||
transcripts: list[dict[str, Any]],
|
||||
) -> Generator[Document | ConnectorFailure, None, None]:
|
||||
"""Process a batch of transcripts into Documents or ConnectorFailures."""
|
||||
transcript_call_ids = cast(
|
||||
list[str],
|
||||
[t.get("callId") for t in transcripts if t.get("callId")],
|
||||
)
|
||||
|
||||
call_details_map = self._fetch_call_details_with_retry(transcript_call_ids)
|
||||
|
||||
for transcript in transcripts:
|
||||
call_id = transcript.get("callId")
|
||||
|
||||
if not call_id or call_id not in call_details_map:
|
||||
logger.error(f"Couldn't get call information for Call ID: {call_id}")
|
||||
if call_id:
|
||||
logger.error(
|
||||
f"Call debug info: call_id={call_id} "
|
||||
f"call_ids={transcript_call_ids} "
|
||||
f"call_details_map={call_details_map.keys()}"
|
||||
)
|
||||
yield ConnectorFailure(
|
||||
failed_document=DocumentFailure(
|
||||
document_id=call_id or "unknown",
|
||||
),
|
||||
failure_message=f"Couldn't get call information for Call ID: {call_id}",
|
||||
)
|
||||
continue
|
||||
|
||||
call_details = call_details_map[call_id]
|
||||
call_metadata = call_details["metaData"]
|
||||
|
||||
call_time_str = call_metadata["started"]
|
||||
call_title = call_metadata["title"]
|
||||
logger.info(
|
||||
f"Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
|
||||
)
|
||||
|
||||
call_parties = cast(list[dict] | None, call_details.get("parties"))
|
||||
if call_parties is None:
|
||||
logger.error(f"Couldn't get parties for Call ID: {call_id}")
|
||||
call_parties = []
|
||||
|
||||
id_to_name_map = self._parse_parties(call_parties)
|
||||
|
||||
speaker_to_name: dict[str, str] = {}
|
||||
|
||||
transcript_text = ""
|
||||
call_purpose = call_metadata["purpose"]
|
||||
if call_purpose:
|
||||
transcript_text += f"Call Description: {call_purpose}\n\n"
|
||||
|
||||
contents = transcript["transcript"]
|
||||
for segment in contents:
|
||||
speaker_id = segment.get("speakerId", "")
|
||||
if speaker_id not in speaker_to_name:
|
||||
if self.hide_user_info:
|
||||
speaker_to_name[speaker_id] = f"User {len(speaker_to_name) + 1}"
|
||||
else:
|
||||
speaker_to_name[speaker_id] = id_to_name_map.get(
|
||||
speaker_id, "Unknown"
|
||||
)
|
||||
|
||||
speaker_name = speaker_to_name[speaker_id]
|
||||
|
||||
sentences = segment.get("sentences", {})
|
||||
monolog = " ".join([sentence.get("text", "") for sentence in sentences])
|
||||
transcript_text += f"{speaker_name}: {monolog}\n\n"
|
||||
|
||||
yield Document(
|
||||
id=call_id,
|
||||
sections=[TextSection(link=call_metadata["url"], text=transcript_text)],
|
||||
source=DocumentSource.GONG,
|
||||
semantic_identifier=call_title or "Untitled",
|
||||
doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
|
||||
timezone.utc
|
||||
),
|
||||
metadata={"client": call_metadata.get("system")},
|
||||
)
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
combined = (
|
||||
f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
|
||||
)
|
||||
self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
if self.auth_token_basic is None:
|
||||
raise ConnectorMissingCredentialError("Gong")
|
||||
|
||||
self._session.headers.update(
|
||||
{"Authorization": f"Basic {self.auth_token_basic}"}
|
||||
)
|
||||
return None
|
||||
|
||||
def build_dummy_checkpoint(self) -> GongConnectorCheckpoint:
|
||||
return GongConnectorCheckpoint(has_more=True)
|
||||
|
||||
def validate_checkpoint_json(self, checkpoint_json: str) -> GongConnectorCheckpoint:
|
||||
return GongConnectorCheckpoint.model_validate_json(checkpoint_json)
|
||||
|
||||
def load_from_checkpoint(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch,
|
||||
end: SecondsSinceUnixEpoch,
|
||||
checkpoint: GongConnectorCheckpoint,
|
||||
) -> CheckpointOutput[GongConnectorCheckpoint]:
|
||||
checkpoint = copy.deepcopy(checkpoint)
|
||||
|
||||
# Step 1: Resolve workspace IDs on first call
|
||||
if checkpoint.workspace_ids is None:
|
||||
checkpoint.workspace_ids = self._resolve_workspace_ids()
|
||||
checkpoint.time_range = self._compute_time_range(start, end)
|
||||
checkpoint.has_more = True
|
||||
return checkpoint
|
||||
|
||||
workspace_ids = checkpoint.workspace_ids
|
||||
|
||||
# If we've exhausted all workspaces, we're done
|
||||
if checkpoint.workspace_index >= len(workspace_ids):
|
||||
checkpoint.has_more = False
|
||||
return checkpoint
|
||||
|
||||
# Use cached time range, falling back to computation if not cached
|
||||
start_time, end_time = checkpoint.time_range or self._compute_time_range(
|
||||
start, end
|
||||
)
|
||||
logger.info(
|
||||
f"Fetching Gong calls between {start_time} and {end_time} "
|
||||
f"(workspace {checkpoint.workspace_index + 1}/{len(workspace_ids)})"
|
||||
)
|
||||
|
||||
workspace_id = workspace_ids[checkpoint.workspace_index]
|
||||
|
||||
# Step 2: Fetch one page of transcripts
|
||||
try:
|
||||
page = self._fetch_transcript_page(
|
||||
start_datetime=start_time,
|
||||
end_datetime=end_time,
|
||||
workspace_id=workspace_id,
|
||||
cursor=checkpoint.cursor,
|
||||
)
|
||||
except _CursorExpiredError:
|
||||
# Gong cursors TTL ~1h from first request in the sequence. If the
|
||||
# checkpoint paused long enough for the cursor to expire, restart
|
||||
# the current workspace from the beginning of the time range.
|
||||
# Document upserts are idempotent (keyed by call_id) so
|
||||
# reprocessing is safe.
|
||||
logger.warning(
|
||||
f"Gong pagination cursor expired for workspace "
|
||||
f"{checkpoint.workspace_index + 1}/{len(workspace_ids)}; "
|
||||
f"restarting workspace from beginning of time range."
|
||||
)
|
||||
checkpoint.cursor = None
|
||||
checkpoint.has_more = True
|
||||
return checkpoint
|
||||
|
||||
# Step 3: Process transcripts into documents
|
||||
if page.transcripts:
|
||||
yield from self._process_transcripts(page.transcripts)
|
||||
|
||||
# Step 4: Update checkpoint state
|
||||
if page.next_cursor:
|
||||
# More pages in this workspace
|
||||
checkpoint.cursor = page.next_cursor
|
||||
checkpoint.has_more = True
|
||||
else:
|
||||
# This workspace is exhausted — advance to next
|
||||
checkpoint.workspace_index += 1
|
||||
checkpoint.cursor = None
|
||||
checkpoint.has_more = checkpoint.workspace_index < len(workspace_ids)
|
||||
|
||||
return checkpoint
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -412,5 +502,13 @@ if __name__ == "__main__":
|
||||
}
|
||||
)
|
||||
|
||||
latest_docs = connector.load_from_state()
|
||||
print(next(latest_docs))
|
||||
checkpoint = connector.build_dummy_checkpoint()
|
||||
while checkpoint.has_more:
|
||||
doc_generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(doc_generator)
|
||||
print(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
print(f"Checkpoint: {checkpoint}")
|
||||
|
||||
@@ -750,31 +750,3 @@ def resync_cc_pair(
|
||||
)
|
||||
|
||||
db_session.commit()
|
||||
|
||||
|
||||
# ── Metrics query helpers ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_connector_health_for_metrics(
|
||||
db_session: Session,
|
||||
) -> list: # Returns list of Row tuples
|
||||
"""Return connector health data for Prometheus metrics.
|
||||
|
||||
Each row is (cc_pair_id, status, in_repeated_error_state,
|
||||
last_successful_index_time, name, source).
|
||||
"""
|
||||
return (
|
||||
db_session.query(
|
||||
ConnectorCredentialPair.id,
|
||||
ConnectorCredentialPair.status,
|
||||
ConnectorCredentialPair.in_repeated_error_state,
|
||||
ConnectorCredentialPair.last_successful_index_time,
|
||||
ConnectorCredentialPair.name,
|
||||
Connector.source,
|
||||
)
|
||||
.join(
|
||||
Connector,
|
||||
ConnectorCredentialPair.connector_id == Connector.id,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
@@ -2,8 +2,6 @@ from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from typing import NamedTuple
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TypeVarTuple
|
||||
|
||||
from sqlalchemy import and_
|
||||
@@ -30,17 +28,6 @@ from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.telemetry import optional_telemetry
|
||||
from onyx.utils.telemetry import RecordType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from onyx.configs.constants import DocumentSource
|
||||
|
||||
# from sqlalchemy.sql.selectable import Select
|
||||
|
||||
# Comment out unused imports that cause mypy errors
|
||||
# from onyx.auth.models import UserRole
|
||||
# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS
|
||||
# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier
|
||||
# from onyx.db.engine import async_query_for_dms
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
@@ -977,106 +964,3 @@ def get_index_attempt_errors_for_cc_pair(
|
||||
stmt = stmt.offset(page * page_size).limit(page_size)
|
||||
|
||||
return list(db_session.scalars(stmt).all())
|
||||
|
||||
|
||||
# ── Metrics query helpers ──────────────────────────────────────────────
|
||||
|
||||
|
||||
class ActiveIndexAttemptMetric(NamedTuple):
|
||||
"""Row returned by get_active_index_attempts_for_metrics."""
|
||||
|
||||
status: IndexingStatus
|
||||
source: "DocumentSource"
|
||||
cc_pair_id: int
|
||||
cc_pair_name: str | None
|
||||
attempt_count: int
|
||||
|
||||
|
||||
def get_active_index_attempts_for_metrics(
|
||||
db_session: Session,
|
||||
) -> list[ActiveIndexAttemptMetric]:
|
||||
"""Return non-terminal index attempts grouped by status, source, and connector.
|
||||
|
||||
Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
|
||||
"""
|
||||
from onyx.db.models import Connector
|
||||
|
||||
terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
|
||||
rows = (
|
||||
db_session.query(
|
||||
IndexAttempt.status,
|
||||
Connector.source,
|
||||
ConnectorCredentialPair.id,
|
||||
ConnectorCredentialPair.name,
|
||||
func.count(),
|
||||
)
|
||||
.join(
|
||||
ConnectorCredentialPair,
|
||||
IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
|
||||
)
|
||||
.join(
|
||||
Connector,
|
||||
ConnectorCredentialPair.connector_id == Connector.id,
|
||||
)
|
||||
.filter(IndexAttempt.status.notin_(terminal_statuses))
|
||||
.group_by(
|
||||
IndexAttempt.status,
|
||||
Connector.source,
|
||||
ConnectorCredentialPair.id,
|
||||
ConnectorCredentialPair.name,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
return [ActiveIndexAttemptMetric(*row) for row in rows]
|
||||
|
||||
|
||||
def get_failed_attempt_counts_by_cc_pair(
|
||||
db_session: Session,
|
||||
since: datetime | None = None,
|
||||
) -> dict[int, int]:
|
||||
"""Return {cc_pair_id: failed_attempt_count} for all connectors.
|
||||
|
||||
When ``since`` is provided, only attempts created after that timestamp
|
||||
are counted. Defaults to the last 90 days to avoid unbounded historical
|
||||
aggregation.
|
||||
"""
|
||||
if since is None:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=90)
|
||||
|
||||
rows = (
|
||||
db_session.query(
|
||||
IndexAttempt.connector_credential_pair_id,
|
||||
func.count(),
|
||||
)
|
||||
.filter(IndexAttempt.status == IndexingStatus.FAILED)
|
||||
.filter(IndexAttempt.time_created >= since)
|
||||
.group_by(IndexAttempt.connector_credential_pair_id)
|
||||
.all()
|
||||
)
|
||||
return {cc_id: count for cc_id, count in rows}
|
||||
|
||||
|
||||
def get_docs_indexed_by_cc_pair(
|
||||
db_session: Session,
|
||||
since: datetime | None = None,
|
||||
) -> dict[int, int]:
|
||||
"""Return {cc_pair_id: total_new_docs_indexed} across successful attempts.
|
||||
|
||||
Only counts attempts with status SUCCESS to avoid inflating counts with
|
||||
partial results from failed attempts. When ``since`` is provided, only
|
||||
attempts created after that timestamp are included.
|
||||
"""
|
||||
if since is None:
|
||||
since = datetime.now(timezone.utc) - timedelta(days=90)
|
||||
|
||||
query = (
|
||||
db_session.query(
|
||||
IndexAttempt.connector_credential_pair_id,
|
||||
func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
|
||||
)
|
||||
.filter(IndexAttempt.status == IndexingStatus.SUCCESS)
|
||||
.filter(IndexAttempt.time_created >= since)
|
||||
.group_by(IndexAttempt.connector_credential_pair_id)
|
||||
)
|
||||
rows = query.all()
|
||||
return {cc_id: int(total or 0) for cc_id, total in rows}
|
||||
|
||||
@@ -23,6 +23,7 @@ import openpyxl
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from PIL import Image
|
||||
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
from onyx.configs.constants import ONYX_METADATA_FILENAME
|
||||
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
@@ -191,6 +192,56 @@ def read_text_file(
|
||||
return file_content_raw, metadata
|
||||
|
||||
|
||||
def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
|
||||
"""Return the number of embedded images in a PDF, short-circuiting at cap+1.
|
||||
|
||||
Used to reject PDFs whose image count would OOM the user-file-processing
|
||||
worker during indexing. Returns a value > cap as a sentinel once the count
|
||||
exceeds the cap, so callers do not iterate thousands of image objects just
|
||||
to report a number. Returns 0 if the PDF cannot be parsed.
|
||||
|
||||
Owner-password-only PDFs (permission restrictions but no open password) are
|
||||
counted normally — they decrypt with an empty string. Truly password-locked
|
||||
PDFs are skipped (return 0) since we can't inspect them; the caller should
|
||||
ensure the password-protected check runs first.
|
||||
|
||||
Always restores the file pointer to its original position before returning.
|
||||
"""
|
||||
from pypdf import PdfReader
|
||||
|
||||
try:
|
||||
start_pos = file.tell()
|
||||
except Exception:
|
||||
start_pos = None
|
||||
try:
|
||||
if start_pos is not None:
|
||||
file.seek(0)
|
||||
reader = PdfReader(file)
|
||||
if reader.is_encrypted:
|
||||
# Try empty password first (owner-password-only PDFs); give up if that fails.
|
||||
try:
|
||||
if reader.decrypt("") == 0:
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
count = 0
|
||||
for page in reader.pages:
|
||||
for _ in page.images:
|
||||
count += 1
|
||||
if count > cap:
|
||||
return count
|
||||
return count
|
||||
except Exception:
|
||||
logger.warning("Failed to count embedded images in PDF", exc_info=True)
|
||||
return 0
|
||||
finally:
|
||||
if start_pos is not None:
|
||||
try:
|
||||
file.seek(start_pos)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
|
||||
"""
|
||||
Extract text from a PDF. For embedded images, a more complex approach is needed.
|
||||
@@ -254,8 +305,27 @@ def read_pdf_file(
|
||||
)
|
||||
|
||||
if extract_images:
|
||||
image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
images_processed = 0
|
||||
cap_reached = False
|
||||
for page_num, page in enumerate(pdf_reader.pages):
|
||||
if cap_reached:
|
||||
break
|
||||
for image_file_object in page.images:
|
||||
if images_processed >= image_cap:
|
||||
# Defense-in-depth backstop. Upload-time validation
|
||||
# should have rejected files exceeding the cap, but
|
||||
# we also break here so a single oversized file can
|
||||
# never pin a worker.
|
||||
logger.warning(
|
||||
"PDF embedded image cap reached (%d). "
|
||||
"Skipping remaining images on page %d and beyond.",
|
||||
image_cap,
|
||||
page_num + 1,
|
||||
)
|
||||
cap_reached = True
|
||||
break
|
||||
|
||||
image = Image.open(io.BytesIO(image_file_object.data))
|
||||
img_byte_arr = io.BytesIO()
|
||||
image.save(img_byte_arr, format=image.format)
|
||||
@@ -268,6 +338,7 @@ def read_pdf_file(
|
||||
image_callback(img_bytes, image_name)
|
||||
else:
|
||||
extracted_images.append((img_bytes, image_name))
|
||||
images_processed += 1
|
||||
|
||||
return text, metadata, extracted_images
|
||||
|
||||
|
||||
@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.permissions import require_permission
|
||||
from onyx.background.celery.versioned_apps.client import app as celery_app
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
@@ -51,6 +53,9 @@ from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.enums import Permission
|
||||
from onyx.db.models import User
|
||||
from onyx.document_index.interfaces import DocumentMetadata
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
|
||||
@@ -128,6 +133,49 @@ class DeleteFileResponse(BaseModel):
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
|
||||
"""True if either the filename or the content-type indicates a PDF.
|
||||
|
||||
Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
|
||||
``Content-Type: application/octet-stream``), so we also fall back to
|
||||
extension-based detection via ``mimetypes.guess_type`` on the filename.
|
||||
"""
|
||||
if content_type == "application/pdf":
|
||||
return True
|
||||
guessed, _ = mimetypes.guess_type(filename)
|
||||
return guessed == "application/pdf"
|
||||
|
||||
|
||||
def _check_pdf_image_caps(
|
||||
filename: str, content: bytes, content_type: str | None, batch_total: int
|
||||
) -> int:
|
||||
"""Enforce per-file and per-batch embedded-image caps for PDFs.
|
||||
|
||||
Returns the number of embedded images in this file (0 for non-PDFs) so
|
||||
callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
|
||||
if either cap is exceeded.
|
||||
"""
|
||||
if not _looks_like_pdf(filename, content_type):
|
||||
return 0
|
||||
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
# Short-circuit at the larger cap so we get a useful count for both checks.
|
||||
count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
|
||||
if count > file_cap:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"PDF '{filename}' contains too many embedded images "
|
||||
f"(more than {file_cap}). Try splitting the document into smaller files.",
|
||||
)
|
||||
if batch_total + count > batch_cap:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"Upload would exceed the {batch_cap}-image limit across all "
|
||||
f"files in this batch. Try uploading fewer image-heavy files at once.",
|
||||
)
|
||||
return count
|
||||
|
||||
|
||||
def _sanitize_path(path: str) -> str:
|
||||
"""Sanitize a file path, removing traversal attempts and normalizing.
|
||||
|
||||
@@ -356,6 +404,7 @@ async def upload_files(
|
||||
|
||||
uploaded_entries: list[LibraryEntryResponse] = []
|
||||
total_size = 0
|
||||
batch_image_total = 0
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Sanitize the base path
|
||||
@@ -375,6 +424,14 @@ async def upload_files(
|
||||
detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB",
|
||||
)
|
||||
|
||||
# Reject PDFs with an unreasonable per-file or per-batch image count
|
||||
batch_image_total += _check_pdf_image_caps(
|
||||
filename=file.filename or "unnamed",
|
||||
content=content,
|
||||
content_type=file.content_type,
|
||||
batch_total=batch_image_total,
|
||||
)
|
||||
|
||||
# Validate cumulative storage (existing + this upload batch)
|
||||
total_size += file_size
|
||||
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
|
||||
@@ -473,6 +530,7 @@ async def upload_zip(
|
||||
|
||||
uploaded_entries: list[LibraryEntryResponse] = []
|
||||
total_size = 0
|
||||
batch_image_total = 0
|
||||
|
||||
# Extract zip contents into a subfolder named after the zip file
|
||||
zip_name = api_sanitize_filename(file.filename or "upload")
|
||||
@@ -511,6 +569,36 @@ async def upload_zip(
|
||||
logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
|
||||
continue
|
||||
|
||||
# Skip PDFs that would trip the per-file or per-batch image
|
||||
# cap (would OOM the user-file-processing worker). Matches
|
||||
# /upload behavior but uses skip-and-warn to stay consistent
|
||||
# with the zip path's handling of oversized files.
|
||||
zip_file_name = zip_info.filename.split("/")[-1]
|
||||
zip_content_type, _ = mimetypes.guess_type(zip_file_name)
|
||||
if zip_content_type == "application/pdf":
|
||||
image_count = count_pdf_embedded_images(
|
||||
BytesIO(file_content),
|
||||
max(
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE,
|
||||
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
|
||||
),
|
||||
)
|
||||
if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
|
||||
logger.warning(
|
||||
"Skipping '%s' - exceeds %d per-file embedded-image cap",
|
||||
zip_info.filename,
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE,
|
||||
)
|
||||
continue
|
||||
if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
|
||||
logger.warning(
|
||||
"Skipping '%s' - would exceed %d per-batch embedded-image cap",
|
||||
zip_info.filename,
|
||||
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
|
||||
)
|
||||
continue
|
||||
batch_image_total += image_count
|
||||
|
||||
total_size += file_size
|
||||
|
||||
# Validate cumulative storage
|
||||
|
||||
@@ -9,7 +9,10 @@ from pydantic import ConfigDict
|
||||
from pydantic import Field
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
from onyx.db.llm import fetch_default_llm_model
|
||||
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
|
||||
from onyx.file_processing.extract_file_text import extract_file_text
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
@@ -190,6 +193,11 @@ def categorize_uploaded_files(
|
||||
token_threshold_k * 1000 if token_threshold_k else None
|
||||
) # 0 → None = no limit
|
||||
|
||||
# Running total of embedded images across PDFs in this batch. Once the
|
||||
# aggregate cap is reached, subsequent PDFs in the same upload are
|
||||
# rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
|
||||
batch_image_total = 0
|
||||
|
||||
for upload in files:
|
||||
try:
|
||||
filename = get_safe_filename(upload)
|
||||
@@ -252,6 +260,47 @@ def categorize_uploaded_files(
|
||||
)
|
||||
continue
|
||||
|
||||
# Reject PDFs with an unreasonable number of embedded images
|
||||
# (either per-file or accumulated across this upload batch).
|
||||
# A PDF with thousands of embedded images can OOM the
|
||||
# user-file-processing celery worker because every image is
|
||||
# decoded with PIL and then sent to the vision LLM.
|
||||
if extension == ".pdf":
|
||||
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
# Use the larger of the two caps as the short-circuit
|
||||
# threshold so we get a useful count for both checks.
|
||||
# count_pdf_embedded_images restores the stream position.
|
||||
count = count_pdf_embedded_images(
|
||||
upload.file, max(file_cap, batch_cap)
|
||||
)
|
||||
if count > file_cap:
|
||||
results.rejected.append(
|
||||
RejectedFile(
|
||||
filename=filename,
|
||||
reason=(
|
||||
f"PDF contains too many embedded images "
|
||||
f"(more than {file_cap}). Try splitting "
|
||||
f"the document into smaller files."
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
if batch_image_total + count > batch_cap:
|
||||
results.rejected.append(
|
||||
RejectedFile(
|
||||
filename=filename,
|
||||
reason=(
|
||||
f"Upload would exceed the "
|
||||
f"{batch_cap}-image limit across all "
|
||||
f"files in this batch. Try uploading "
|
||||
f"fewer image-heavy files at once."
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
batch_image_total += count
|
||||
|
||||
text_content = extract_file_text(
|
||||
file=upload.file,
|
||||
file_name=filename,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""Generic Celery task lifecycle Prometheus metrics.
|
||||
|
||||
Provides signal handlers that track task started/completed/failed counts,
|
||||
active task gauge, task duration histograms, and retry/reject/revoke counts.
|
||||
active task gauge, task duration histograms, queue wait time histograms,
|
||||
and retry/reject/revoke counts.
|
||||
These fire for ALL tasks on the worker — no per-connector enrichment
|
||||
(see indexing_task_metrics.py for that).
|
||||
|
||||
@@ -71,6 +72,32 @@ TASK_REJECTED = Counter(
|
||||
["task_name"],
|
||||
)
|
||||
|
||||
TASK_QUEUE_WAIT = Histogram(
|
||||
"onyx_celery_task_queue_wait_seconds",
|
||||
"Time a Celery task spent waiting in the queue before execution started",
|
||||
["task_name", "queue"],
|
||||
buckets=[
|
||||
0.1,
|
||||
0.5,
|
||||
1,
|
||||
5,
|
||||
30,
|
||||
60,
|
||||
300,
|
||||
600,
|
||||
1800,
|
||||
3600,
|
||||
7200,
|
||||
14400,
|
||||
28800,
|
||||
43200,
|
||||
86400,
|
||||
172800,
|
||||
432000,
|
||||
864000,
|
||||
],
|
||||
)
|
||||
|
||||
# task_id → (monotonic start time, metric labels)
|
||||
_task_start_times: dict[str, tuple[float, dict[str, str]]] = {}
|
||||
|
||||
@@ -133,6 +160,13 @@ def on_celery_task_prerun(
|
||||
with _task_start_times_lock:
|
||||
_evict_stale_start_times()
|
||||
_task_start_times[task_id] = (time.monotonic(), labels)
|
||||
|
||||
headers = getattr(task.request, "headers", None) or {}
|
||||
enqueued_at = headers.get("enqueued_at")
|
||||
if isinstance(enqueued_at, (int, float)):
|
||||
TASK_QUEUE_WAIT.labels(**labels).observe(
|
||||
max(0.0, time.time() - enqueued_at)
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to record celery task prerun metrics", exc_info=True)
|
||||
|
||||
|
||||
123
backend/onyx/server/metrics/connector_health_metrics.py
Normal file
123
backend/onyx/server/metrics/connector_health_metrics.py
Normal file
@@ -0,0 +1,123 @@
|
||||
"""Prometheus metrics for connector health and index attempts.
|
||||
|
||||
Emitted by docfetching and docprocessing workers when connector or
|
||||
index attempt state changes. All functions silently catch exceptions
|
||||
to avoid disrupting the caller's business logic.
|
||||
|
||||
Gauge metrics (error state, last success timestamp) are per-process.
|
||||
With multiple worker pods, use max() aggregation in PromQL to get the
|
||||
correct value across instances, e.g.:
|
||||
max by (cc_pair_id, connector_name) (onyx_connector_in_error_state)
|
||||
|
||||
Unlike the per-task counters in indexing_task_metrics.py, these metrics
|
||||
include connector_name because their cardinality is bounded by the number
|
||||
of connectors (one series per connector), not by the number of task
|
||||
executions.
|
||||
"""
|
||||
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Gauge
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_CONNECTOR_LABELS = ["tenant_id", "source", "cc_pair_id", "connector_name"]
|
||||
|
||||
# --- Index attempt lifecycle ---
|
||||
|
||||
INDEX_ATTEMPT_STATUS = Counter(
|
||||
"onyx_index_attempt_transitions_total",
|
||||
"Index attempt status transitions",
|
||||
[*_CONNECTOR_LABELS, "status"],
|
||||
)
|
||||
|
||||
# --- Connector health ---
|
||||
|
||||
CONNECTOR_IN_ERROR_STATE = Gauge(
|
||||
"onyx_connector_in_error_state",
|
||||
"Whether the connector is in a repeated error state (1=yes, 0=no)",
|
||||
_CONNECTOR_LABELS,
|
||||
)
|
||||
|
||||
CONNECTOR_LAST_SUCCESS_TIMESTAMP = Gauge(
|
||||
"onyx_connector_last_success_timestamp_seconds",
|
||||
"Unix timestamp of last successful indexing for this connector",
|
||||
_CONNECTOR_LABELS,
|
||||
)
|
||||
|
||||
CONNECTOR_DOCS_INDEXED = Counter(
|
||||
"onyx_connector_docs_indexed_total",
|
||||
"Total documents indexed per connector (monotonic)",
|
||||
_CONNECTOR_LABELS,
|
||||
)
|
||||
|
||||
CONNECTOR_INDEXING_ERRORS = Counter(
|
||||
"onyx_connector_indexing_errors_total",
|
||||
"Total failed index attempts per connector (monotonic)",
|
||||
_CONNECTOR_LABELS,
|
||||
)
|
||||
|
||||
|
||||
def on_index_attempt_status_change(
|
||||
tenant_id: str,
|
||||
source: str,
|
||||
cc_pair_id: int,
|
||||
connector_name: str,
|
||||
status: str,
|
||||
) -> None:
|
||||
"""Called on any index attempt status transition."""
|
||||
try:
|
||||
labels = {
|
||||
"tenant_id": tenant_id,
|
||||
"source": source,
|
||||
"cc_pair_id": str(cc_pair_id),
|
||||
"connector_name": connector_name,
|
||||
}
|
||||
INDEX_ATTEMPT_STATUS.labels(**labels, status=status).inc()
|
||||
if status == "failed":
|
||||
CONNECTOR_INDEXING_ERRORS.labels(**labels).inc()
|
||||
except Exception:
|
||||
logger.debug("Failed to record index attempt status metric", exc_info=True)
|
||||
|
||||
|
||||
def on_connector_error_state_change(
|
||||
tenant_id: str,
|
||||
source: str,
|
||||
cc_pair_id: int,
|
||||
connector_name: str,
|
||||
in_error: bool,
|
||||
) -> None:
|
||||
"""Called when a connector's in_repeated_error_state changes."""
|
||||
try:
|
||||
CONNECTOR_IN_ERROR_STATE.labels(
|
||||
tenant_id=tenant_id,
|
||||
source=source,
|
||||
cc_pair_id=str(cc_pair_id),
|
||||
connector_name=connector_name,
|
||||
).set(1.0 if in_error else 0.0)
|
||||
except Exception:
|
||||
logger.debug("Failed to record connector error state metric", exc_info=True)
|
||||
|
||||
|
||||
def on_connector_indexing_success(
|
||||
tenant_id: str,
|
||||
source: str,
|
||||
cc_pair_id: int,
|
||||
connector_name: str,
|
||||
docs_indexed: int,
|
||||
success_timestamp: float,
|
||||
) -> None:
|
||||
"""Called when an indexing run completes successfully."""
|
||||
try:
|
||||
labels = {
|
||||
"tenant_id": tenant_id,
|
||||
"source": source,
|
||||
"cc_pair_id": str(cc_pair_id),
|
||||
"connector_name": connector_name,
|
||||
}
|
||||
CONNECTOR_LAST_SUCCESS_TIMESTAMP.labels(**labels).set(success_timestamp)
|
||||
if docs_indexed > 0:
|
||||
CONNECTOR_DOCS_INDEXED.labels(**labels).inc(docs_indexed)
|
||||
except Exception:
|
||||
logger.debug("Failed to record connector success metric", exc_info=True)
|
||||
104
backend/onyx/server/metrics/deletion_metrics.py
Normal file
104
backend/onyx/server/metrics/deletion_metrics.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Connector-deletion-specific Prometheus metrics.
|
||||
|
||||
Tracks the deletion lifecycle:
|
||||
1. Deletions started (taskset generated)
|
||||
2. Deletions completed (success or failure)
|
||||
3. Taskset duration (from taskset generation to completion or failure).
|
||||
Note: this measures the most recent taskset execution, NOT wall-clock
|
||||
time since the user triggered the deletion. When deletion is blocked by
|
||||
indexing/pruning/permissions, the fence is cleared and a fresh taskset
|
||||
is generated on each retry, resetting this timer.
|
||||
4. Deletion blocked by dependencies (indexing, pruning, permissions, etc.)
|
||||
5. Fence resets (stuck deletion recovery)
|
||||
|
||||
All metrics are labeled by tenant_id. cc_pair_id is intentionally excluded
|
||||
to avoid unbounded cardinality.
|
||||
|
||||
Usage:
|
||||
from onyx.server.metrics.deletion_metrics import (
|
||||
inc_deletion_started,
|
||||
inc_deletion_completed,
|
||||
observe_deletion_taskset_duration,
|
||||
inc_deletion_blocked,
|
||||
inc_deletion_fence_reset,
|
||||
)
|
||||
"""
|
||||
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
DELETION_STARTED = Counter(
|
||||
"onyx_deletion_started_total",
|
||||
"Connector deletions initiated (taskset generated)",
|
||||
["tenant_id"],
|
||||
)
|
||||
|
||||
DELETION_COMPLETED = Counter(
|
||||
"onyx_deletion_completed_total",
|
||||
"Connector deletions completed",
|
||||
["tenant_id", "outcome"],
|
||||
)
|
||||
|
||||
DELETION_TASKSET_DURATION = Histogram(
|
||||
"onyx_deletion_taskset_duration_seconds",
|
||||
"Duration of a connector deletion taskset, from taskset generation "
|
||||
"to completion or failure. Does not include time spent blocked on "
|
||||
"indexing/pruning/permissions before the taskset was generated.",
|
||||
["tenant_id", "outcome"],
|
||||
buckets=[10, 30, 60, 120, 300, 600, 1800, 3600, 7200, 21600],
|
||||
)
|
||||
|
||||
DELETION_BLOCKED = Counter(
|
||||
"onyx_deletion_blocked_total",
|
||||
"Times deletion was blocked by a dependency",
|
||||
["tenant_id", "blocker"],
|
||||
)
|
||||
|
||||
DELETION_FENCE_RESET = Counter(
|
||||
"onyx_deletion_fence_reset_total",
|
||||
"Deletion fences reset due to missing celery tasks",
|
||||
["tenant_id"],
|
||||
)
|
||||
|
||||
|
||||
def inc_deletion_started(tenant_id: str) -> None:
|
||||
try:
|
||||
DELETION_STARTED.labels(tenant_id=tenant_id).inc()
|
||||
except Exception:
|
||||
logger.debug("Failed to record deletion started", exc_info=True)
|
||||
|
||||
|
||||
def inc_deletion_completed(tenant_id: str, outcome: str) -> None:
|
||||
try:
|
||||
DELETION_COMPLETED.labels(tenant_id=tenant_id, outcome=outcome).inc()
|
||||
except Exception:
|
||||
logger.debug("Failed to record deletion completed", exc_info=True)
|
||||
|
||||
|
||||
def observe_deletion_taskset_duration(
|
||||
tenant_id: str, outcome: str, duration_seconds: float
|
||||
) -> None:
|
||||
try:
|
||||
DELETION_TASKSET_DURATION.labels(tenant_id=tenant_id, outcome=outcome).observe(
|
||||
duration_seconds
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to record deletion taskset duration", exc_info=True)
|
||||
|
||||
|
||||
def inc_deletion_blocked(tenant_id: str, blocker: str) -> None:
|
||||
try:
|
||||
DELETION_BLOCKED.labels(tenant_id=tenant_id, blocker=blocker).inc()
|
||||
except Exception:
|
||||
logger.debug("Failed to record deletion blocked", exc_info=True)
|
||||
|
||||
|
||||
def inc_deletion_fence_reset(tenant_id: str) -> None:
|
||||
try:
|
||||
DELETION_FENCE_RESET.labels(tenant_id=tenant_id).inc()
|
||||
except Exception:
|
||||
logger.debug("Failed to record deletion fence reset", exc_info=True)
|
||||
@@ -1,25 +1,30 @@
|
||||
"""Prometheus collectors for Celery queue depths and indexing pipeline state.
|
||||
"""Prometheus collectors for Celery queue depths and infrastructure health.
|
||||
|
||||
These collectors query Redis and Postgres at scrape time (the Collector pattern),
|
||||
These collectors query Redis at scrape time (the Collector pattern),
|
||||
so metrics are always fresh when Prometheus scrapes /metrics. They run inside the
|
||||
monitoring celery worker which already has Redis and DB access.
|
||||
monitoring celery worker which already has Redis access.
|
||||
|
||||
To avoid hammering Redis/Postgres on every 15s scrape, results are cached with
|
||||
To avoid hammering Redis on every 15s scrape, results are cached with
|
||||
a configurable TTL (default 30s). This means metrics may be up to TTL seconds
|
||||
stale, which is fine for monitoring dashboards.
|
||||
|
||||
Note: connector health and index attempt metrics are push-based (emitted by
|
||||
workers at state-change time) and live in connector_health_metrics.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
from prometheus_client.core import GaugeMetricFamily
|
||||
from prometheus_client.registry import Collector
|
||||
from redis import Redis
|
||||
|
||||
from onyx.background.celery.celery_redis import celery_get_broker_client
|
||||
from onyx.background.celery.celery_redis import celery_get_queue_length
|
||||
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
@@ -31,6 +36,11 @@ logger = setup_logger()
|
||||
# the previous result without re-querying Redis/Postgres.
|
||||
_DEFAULT_CACHE_TTL = 30.0
|
||||
|
||||
# Maximum time (seconds) a single _collect_fresh() call may take before
|
||||
# the collector gives up and returns stale/empty results. Prevents the
|
||||
# /metrics endpoint from hanging indefinitely when a DB or Redis query stalls.
|
||||
_DEFAULT_COLLECT_TIMEOUT = 120.0
|
||||
|
||||
_QUEUE_LABEL_MAP: dict[str, str] = {
|
||||
OnyxCeleryQueues.PRIMARY: "primary",
|
||||
OnyxCeleryQueues.DOCPROCESSING: "docprocessing",
|
||||
@@ -62,18 +72,32 @@ _UNACKED_QUEUES: list[str] = [
|
||||
|
||||
|
||||
class _CachedCollector(Collector):
|
||||
"""Base collector with TTL-based caching.
|
||||
"""Base collector with TTL-based caching and timeout protection.
|
||||
|
||||
Subclasses implement ``_collect_fresh()`` to query the actual data source.
|
||||
The base ``collect()`` returns cached results if the TTL hasn't expired,
|
||||
avoiding repeated queries when Prometheus scrapes frequently.
|
||||
|
||||
A per-collection timeout prevents a slow DB or Redis query from blocking
|
||||
the /metrics endpoint indefinitely. If _collect_fresh() exceeds the
|
||||
timeout, stale cached results are returned instead.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
cache_ttl: float = _DEFAULT_CACHE_TTL,
|
||||
collect_timeout: float = _DEFAULT_COLLECT_TIMEOUT,
|
||||
) -> None:
|
||||
self._cache_ttl = cache_ttl
|
||||
self._collect_timeout = collect_timeout
|
||||
self._cached_result: list[GaugeMetricFamily] | None = None
|
||||
self._last_collect_time: float = 0.0
|
||||
self._lock = threading.Lock()
|
||||
self._executor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=1,
|
||||
thread_name_prefix=type(self).__name__,
|
||||
)
|
||||
self._inflight: concurrent.futures.Future | None = None
|
||||
|
||||
def collect(self) -> list[GaugeMetricFamily]:
|
||||
with self._lock:
|
||||
@@ -84,12 +108,28 @@ class _CachedCollector(Collector):
|
||||
):
|
||||
return self._cached_result
|
||||
|
||||
# If a previous _collect_fresh() is still running, wait on it
|
||||
# rather than queuing another. This prevents unbounded task
|
||||
# accumulation in the executor during extended DB outages.
|
||||
if self._inflight is not None and not self._inflight.done():
|
||||
future = self._inflight
|
||||
else:
|
||||
future = self._executor.submit(self._collect_fresh)
|
||||
self._inflight = future
|
||||
|
||||
try:
|
||||
result = self._collect_fresh()
|
||||
result = future.result(timeout=self._collect_timeout)
|
||||
self._inflight = None
|
||||
self._cached_result = result
|
||||
self._last_collect_time = now
|
||||
return result
|
||||
except concurrent.futures.TimeoutError:
|
||||
logger.warning(
|
||||
f"{type(self).__name__}._collect_fresh() timed out after {self._collect_timeout}s, returning stale cache"
|
||||
)
|
||||
return self._cached_result if self._cached_result is not None else []
|
||||
except Exception:
|
||||
self._inflight = None
|
||||
logger.exception(f"Error in {type(self).__name__}.collect()")
|
||||
# Return stale cache on error rather than nothing — avoids
|
||||
# metrics disappearing during transient failures.
|
||||
@@ -117,8 +157,6 @@ class QueueDepthCollector(_CachedCollector):
|
||||
if self._celery_app is None:
|
||||
return []
|
||||
|
||||
from onyx.background.celery.celery_redis import celery_get_broker_client
|
||||
|
||||
redis_client = celery_get_broker_client(self._celery_app)
|
||||
|
||||
depth = GaugeMetricFamily(
|
||||
@@ -194,208 +232,6 @@ class QueueDepthCollector(_CachedCollector):
|
||||
return None
|
||||
|
||||
|
||||
class IndexAttemptCollector(_CachedCollector):
|
||||
"""Queries Postgres for index attempt state on each scrape."""
|
||||
|
||||
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
|
||||
super().__init__(cache_ttl)
|
||||
self._configured: bool = False
|
||||
self._terminal_statuses: list = []
|
||||
|
||||
def configure(self) -> None:
|
||||
"""Call once DB engine is initialized."""
|
||||
from onyx.db.enums import IndexingStatus
|
||||
|
||||
self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
|
||||
self._configured = True
|
||||
|
||||
def _collect_fresh(self) -> list[GaugeMetricFamily]:
|
||||
if not self._configured:
|
||||
return []
|
||||
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.engine.tenant_utils import get_all_tenant_ids
|
||||
from onyx.db.index_attempt import get_active_index_attempts_for_metrics
|
||||
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
|
||||
attempts_gauge = GaugeMetricFamily(
|
||||
"onyx_index_attempts_active",
|
||||
"Number of non-terminal index attempts",
|
||||
labels=[
|
||||
"status",
|
||||
"source",
|
||||
"tenant_id",
|
||||
"connector_name",
|
||||
"cc_pair_id",
|
||||
],
|
||||
)
|
||||
|
||||
tenant_ids = get_all_tenant_ids()
|
||||
|
||||
for tid in tenant_ids:
|
||||
# Defensive guard — get_all_tenant_ids() should never yield None,
|
||||
# but we guard here for API stability in case the contract changes.
|
||||
if tid is None:
|
||||
continue
|
||||
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
|
||||
try:
|
||||
with get_session_with_current_tenant() as session:
|
||||
rows = get_active_index_attempts_for_metrics(session)
|
||||
|
||||
for status, source, cc_id, cc_name, count in rows:
|
||||
name_val = cc_name or f"cc_pair_{cc_id}"
|
||||
attempts_gauge.add_metric(
|
||||
[
|
||||
status.value,
|
||||
source.value,
|
||||
tid,
|
||||
name_val,
|
||||
str(cc_id),
|
||||
],
|
||||
count,
|
||||
)
|
||||
finally:
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
|
||||
|
||||
return [attempts_gauge]
|
||||
|
||||
|
||||
class ConnectorHealthCollector(_CachedCollector):
|
||||
"""Queries Postgres for connector health state on each scrape."""
|
||||
|
||||
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
|
||||
super().__init__(cache_ttl)
|
||||
self._configured: bool = False
|
||||
|
||||
def configure(self) -> None:
|
||||
"""Call once DB engine is initialized."""
|
||||
self._configured = True
|
||||
|
||||
def _collect_fresh(self) -> list[GaugeMetricFamily]:
|
||||
if not self._configured:
|
||||
return []
|
||||
|
||||
from onyx.db.connector_credential_pair import (
|
||||
get_connector_health_for_metrics,
|
||||
)
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.engine.tenant_utils import get_all_tenant_ids
|
||||
from onyx.db.index_attempt import get_docs_indexed_by_cc_pair
|
||||
from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair
|
||||
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
|
||||
staleness_gauge = GaugeMetricFamily(
|
||||
"onyx_connector_last_success_age_seconds",
|
||||
"Seconds since last successful index for this connector",
|
||||
labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
|
||||
)
|
||||
error_state_gauge = GaugeMetricFamily(
|
||||
"onyx_connector_in_error_state",
|
||||
"Whether the connector is in a repeated error state (1=yes, 0=no)",
|
||||
labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
|
||||
)
|
||||
by_status_gauge = GaugeMetricFamily(
|
||||
"onyx_connectors_by_status",
|
||||
"Number of connectors grouped by status",
|
||||
labels=["tenant_id", "status"],
|
||||
)
|
||||
error_total_gauge = GaugeMetricFamily(
|
||||
"onyx_connectors_in_error_total",
|
||||
"Total number of connectors in repeated error state",
|
||||
labels=["tenant_id"],
|
||||
)
|
||||
per_connector_labels = [
|
||||
"tenant_id",
|
||||
"source",
|
||||
"cc_pair_id",
|
||||
"connector_name",
|
||||
]
|
||||
docs_success_gauge = GaugeMetricFamily(
|
||||
"onyx_connector_docs_indexed",
|
||||
"Total new documents indexed (90-day rolling sum) per connector",
|
||||
labels=per_connector_labels,
|
||||
)
|
||||
docs_error_gauge = GaugeMetricFamily(
|
||||
"onyx_connector_error_count",
|
||||
"Total number of failed index attempts per connector",
|
||||
labels=per_connector_labels,
|
||||
)
|
||||
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
tenant_ids = get_all_tenant_ids()
|
||||
|
||||
for tid in tenant_ids:
|
||||
# Defensive guard — get_all_tenant_ids() should never yield None,
|
||||
# but we guard here for API stability in case the contract changes.
|
||||
if tid is None:
|
||||
continue
|
||||
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
|
||||
try:
|
||||
with get_session_with_current_tenant() as session:
|
||||
pairs = get_connector_health_for_metrics(session)
|
||||
error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)
|
||||
docs_by_cc = get_docs_indexed_by_cc_pair(session)
|
||||
|
||||
status_counts: dict[str, int] = {}
|
||||
error_count = 0
|
||||
|
||||
for (
|
||||
cc_id,
|
||||
status,
|
||||
in_error,
|
||||
last_success,
|
||||
cc_name,
|
||||
source,
|
||||
) in pairs:
|
||||
cc_id_str = str(cc_id)
|
||||
source_val = source.value
|
||||
name_val = cc_name or f"cc_pair_{cc_id}"
|
||||
label_vals = [tid, source_val, cc_id_str, name_val]
|
||||
|
||||
if last_success is not None:
|
||||
# Both `now` and `last_success` are timezone-aware
|
||||
# (the DB column uses DateTime(timezone=True)),
|
||||
# so subtraction is safe.
|
||||
age = (now - last_success).total_seconds()
|
||||
staleness_gauge.add_metric(label_vals, age)
|
||||
|
||||
error_state_gauge.add_metric(
|
||||
label_vals,
|
||||
1.0 if in_error else 0.0,
|
||||
)
|
||||
if in_error:
|
||||
error_count += 1
|
||||
|
||||
docs_success_gauge.add_metric(
|
||||
label_vals,
|
||||
docs_by_cc.get(cc_id, 0),
|
||||
)
|
||||
|
||||
docs_error_gauge.add_metric(
|
||||
label_vals,
|
||||
error_counts_by_cc.get(cc_id, 0),
|
||||
)
|
||||
|
||||
status_val = status.value
|
||||
status_counts[status_val] = status_counts.get(status_val, 0) + 1
|
||||
|
||||
for status_val, count in status_counts.items():
|
||||
by_status_gauge.add_metric([tid, status_val], count)
|
||||
|
||||
error_total_gauge.add_metric([tid], error_count)
|
||||
finally:
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
|
||||
|
||||
return [
|
||||
staleness_gauge,
|
||||
error_state_gauge,
|
||||
by_status_gauge,
|
||||
error_total_gauge,
|
||||
docs_success_gauge,
|
||||
docs_error_gauge,
|
||||
]
|
||||
|
||||
|
||||
class RedisHealthCollector(_CachedCollector):
|
||||
"""Collects Redis server health metrics (memory, clients, etc.)."""
|
||||
|
||||
@@ -411,8 +247,6 @@ class RedisHealthCollector(_CachedCollector):
|
||||
if self._celery_app is None:
|
||||
return []
|
||||
|
||||
from onyx.background.celery.celery_redis import celery_get_broker_client
|
||||
|
||||
redis_client = celery_get_broker_client(self._celery_app)
|
||||
|
||||
memory_used = GaugeMetricFamily(
|
||||
@@ -495,7 +329,9 @@ class WorkerHeartbeatMonitor:
|
||||
},
|
||||
)
|
||||
recv.capture(
|
||||
limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True
|
||||
limit=None,
|
||||
timeout=self._HEARTBEAT_TIMEOUT_SECONDS,
|
||||
wakeup=True,
|
||||
)
|
||||
except Exception:
|
||||
if self._running:
|
||||
@@ -553,6 +389,15 @@ class WorkerHealthCollector(_CachedCollector):
|
||||
|
||||
Reads worker status from ``WorkerHeartbeatMonitor`` which listens
|
||||
to the Celery event stream via a single persistent connection.
|
||||
|
||||
TODO: every monitoring pod subscribes to the cluster-wide Celery event
|
||||
stream, so each replica reports health for *all* workers in the cluster,
|
||||
not just itself. Prometheus distinguishes the replicas via the ``instance``
|
||||
label, so this doesn't break scraping, but it means N monitoring replicas
|
||||
do N× the work and may emit slightly inconsistent snapshots of the same
|
||||
cluster. The proper fix is to have each worker expose its own health (or
|
||||
to elect a single monitoring replica as the reporter) rather than
|
||||
broadcasting the full cluster view from every monitoring pod.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_ttl: float = 30.0) -> None:
|
||||
@@ -571,10 +416,16 @@ class WorkerHealthCollector(_CachedCollector):
|
||||
"onyx_celery_active_worker_count",
|
||||
"Number of active Celery workers with recent heartbeats",
|
||||
)
|
||||
# Celery hostnames are ``{worker_type}@{nodename}`` (see supervisord.conf).
|
||||
# Emitting only the worker_type as a label causes N replicas of the same
|
||||
# type to collapse into identical timeseries within a single scrape,
|
||||
# which Prometheus rejects as "duplicate sample for timestamp". Split
|
||||
# the pieces into separate labels so each replica is distinct; callers
|
||||
# can still ``sum by (worker_type)`` to recover the old aggregated view.
|
||||
worker_up = GaugeMetricFamily(
|
||||
"onyx_celery_worker_up",
|
||||
"Whether a specific Celery worker is alive (1=up, 0=down)",
|
||||
labels=["worker"],
|
||||
labels=["worker_type", "hostname"],
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -582,11 +433,15 @@ class WorkerHealthCollector(_CachedCollector):
|
||||
alive_count = sum(1 for alive in status.values() if alive)
|
||||
active_workers.add_metric([], alive_count)
|
||||
|
||||
for hostname in sorted(status):
|
||||
# Use short name (before @) for single-host deployments,
|
||||
# full hostname when multiple hosts share a worker type.
|
||||
label = hostname.split("@")[0]
|
||||
worker_up.add_metric([label], 1 if status[hostname] else 0)
|
||||
for full_hostname in sorted(status):
|
||||
worker_type, sep, host = full_hostname.partition("@")
|
||||
if not sep:
|
||||
# Hostname didn't contain "@" — fall back to using the
|
||||
# whole string as the hostname with an empty type.
|
||||
worker_type, host = "", full_hostname
|
||||
worker_up.add_metric(
|
||||
[worker_type, host], 1 if status[full_hostname] else 0
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to collect worker health metrics", exc_info=True)
|
||||
|
||||
|
||||
@@ -6,8 +6,6 @@ Called once by the monitoring celery worker after Redis and DB are ready.
|
||||
from celery import Celery
|
||||
from prometheus_client.registry import REGISTRY
|
||||
|
||||
from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
|
||||
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
|
||||
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
|
||||
from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
|
||||
from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
|
||||
@@ -21,8 +19,6 @@ logger = setup_logger()
|
||||
# module level ensures they survive the lifetime of the worker process and are
|
||||
# only registered with the Prometheus registry once.
|
||||
_queue_collector = QueueDepthCollector()
|
||||
_attempt_collector = IndexAttemptCollector()
|
||||
_connector_collector = ConnectorHealthCollector()
|
||||
_redis_health_collector = RedisHealthCollector()
|
||||
_worker_health_collector = WorkerHealthCollector()
|
||||
_heartbeat_monitor: WorkerHeartbeatMonitor | None = None
|
||||
@@ -34,6 +30,9 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
|
||||
Args:
|
||||
celery_app: The Celery application instance. Used to obtain a
|
||||
broker Redis client on each scrape for queue depth metrics.
|
||||
|
||||
Note: connector health and index attempt metrics are push-based
|
||||
(see connector_health_metrics.py) and do not use collectors.
|
||||
"""
|
||||
_queue_collector.set_celery_app(celery_app)
|
||||
_redis_health_collector.set_celery_app(celery_app)
|
||||
@@ -47,13 +46,8 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
|
||||
_heartbeat_monitor.start()
|
||||
_worker_health_collector.set_monitor(_heartbeat_monitor)
|
||||
|
||||
_attempt_collector.configure()
|
||||
_connector_collector.configure()
|
||||
|
||||
for collector in (
|
||||
_queue_collector,
|
||||
_attempt_collector,
|
||||
_connector_collector,
|
||||
_redis_health_collector,
|
||||
_worker_health_collector,
|
||||
):
|
||||
|
||||
@@ -27,6 +27,8 @@ _DEFAULT_PORTS: dict[str, int] = {
|
||||
"docfetching": 9092,
|
||||
"docprocessing": 9093,
|
||||
"heavy": 9094,
|
||||
"light": 9095,
|
||||
"primary": 9097,
|
||||
}
|
||||
|
||||
_server_started = False
|
||||
|
||||
@@ -28,14 +28,14 @@ PRUNING_ENUMERATION_DURATION = Histogram(
|
||||
"onyx_pruning_enumeration_duration_seconds",
|
||||
"Duration of document ID enumeration from the source connector during pruning",
|
||||
["connector_type"],
|
||||
buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
|
||||
buckets=[5, 60, 600, 1800, 3600, 10800, 21600],
|
||||
)
|
||||
|
||||
PRUNING_DIFF_DURATION = Histogram(
|
||||
"onyx_pruning_diff_duration_seconds",
|
||||
"Duration of diff computation and subtask dispatch during pruning",
|
||||
["connector_type"],
|
||||
buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
|
||||
buckets=[0.1, 0.25, 0.5, 1, 2, 5, 15, 30, 60],
|
||||
)
|
||||
|
||||
PRUNING_RATE_LIMIT_ERRORS = Counter(
|
||||
|
||||
@@ -214,7 +214,9 @@ distro==1.9.0
|
||||
dnspython==2.8.0
|
||||
# via email-validator
|
||||
docstring-parser==0.17.0
|
||||
# via cyclopts
|
||||
# via
|
||||
# cyclopts
|
||||
# google-cloud-aiplatform
|
||||
docutils==0.22.3
|
||||
# via rich-rst
|
||||
dropbox==12.0.2
|
||||
@@ -270,7 +272,13 @@ gitdb==4.0.12
|
||||
gitpython==3.1.45
|
||||
# via braintrust
|
||||
google-api-core==2.28.1
|
||||
# via google-api-python-client
|
||||
# via
|
||||
# google-api-python-client
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-api-python-client==2.86.0
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
@@ -278,21 +286,61 @@ google-auth==2.48.0
|
||||
# google-api-python-client
|
||||
# google-auth-httplib2
|
||||
# google-auth-oauthlib
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-auth-httplib2==0.1.0
|
||||
# via google-api-python-client
|
||||
google-auth-oauthlib==1.0.0
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
greenlet==3.2.4
|
||||
# via
|
||||
# playwright
|
||||
# sqlalchemy
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -562,6 +610,8 @@ packaging==24.2
|
||||
# dask
|
||||
# distributed
|
||||
# fastmcp
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
# jira
|
||||
# kombu
|
||||
@@ -608,12 +658,19 @@ propcache==0.4.1
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via google-api-core
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# ddtrace
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# onnxruntime
|
||||
# opentelemetry-proto
|
||||
# proto-plus
|
||||
@@ -646,6 +703,7 @@ pydantic==2.11.7
|
||||
# exa-py
|
||||
# fastapi
|
||||
# fastmcp
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# langchain-core
|
||||
# langfuse
|
||||
@@ -702,6 +760,7 @@ python-dateutil==2.8.2
|
||||
# botocore
|
||||
# celery
|
||||
# dateparser
|
||||
# google-cloud-bigquery
|
||||
# htmldate
|
||||
# hubspot-api-client
|
||||
# kubernetes
|
||||
@@ -780,6 +839,8 @@ requests==2.33.0
|
||||
# dropbox
|
||||
# exa-py
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# hubspot-api-client
|
||||
# huggingface-hub
|
||||
@@ -951,7 +1012,9 @@ typing-extensions==4.15.0
|
||||
# exa-py
|
||||
# exceptiongroup
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# jira
|
||||
# langchain-core
|
||||
|
||||
@@ -113,6 +113,8 @@ distlib==0.4.0
|
||||
# via virtualenv
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docstring-parser==0.17.0
|
||||
# via google-cloud-aiplatform
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
execnet==2.1.2
|
||||
@@ -140,14 +142,65 @@ frozenlist==1.8.0
|
||||
# aiosignal
|
||||
fsspec==2025.10.0
|
||||
# via huggingface-hub
|
||||
google-api-core==2.28.1
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
greenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
|
||||
# via sqlalchemy
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -264,6 +317,8 @@ openapi-generator-cli==7.17.0
|
||||
packaging==24.2
|
||||
# via
|
||||
# black
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# hatchling
|
||||
# huggingface-hub
|
||||
# ipykernel
|
||||
@@ -304,6 +359,20 @@ propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# proto-plus
|
||||
psutil==7.1.3
|
||||
# via ipykernel
|
||||
ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
|
||||
@@ -325,6 +394,7 @@ pydantic==2.11.7
|
||||
# agent-client-protocol
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# litellm
|
||||
# mcp
|
||||
@@ -359,6 +429,7 @@ python-dateutil==2.8.2
|
||||
# via
|
||||
# aiobotocore
|
||||
# botocore
|
||||
# google-cloud-bigquery
|
||||
# jupyter-client
|
||||
# kubernetes
|
||||
# matplotlib
|
||||
@@ -391,6 +462,9 @@ reorder-python-imports-black==3.14.0
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# huggingface-hub
|
||||
# kubernetes
|
||||
@@ -485,7 +559,9 @@ typing-extensions==4.15.0
|
||||
# celery-types
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# ipython
|
||||
# mcp
|
||||
|
||||
@@ -86,6 +86,8 @@ discord-py==2.4.0
|
||||
# via onyx
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docstring-parser==0.17.0
|
||||
# via google-cloud-aiplatform
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
fastapi==0.133.1
|
||||
@@ -102,12 +104,63 @@ frozenlist==1.8.0
|
||||
# aiosignal
|
||||
fsspec==2025.10.0
|
||||
# via huggingface-hub
|
||||
google-api-core==2.28.1
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -178,7 +231,10 @@ openai==2.14.0
|
||||
# litellm
|
||||
# onyx
|
||||
packaging==24.2
|
||||
# via huggingface-hub
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
parameterized==0.9.0
|
||||
# via cohere
|
||||
posthog==3.7.4
|
||||
@@ -192,6 +248,20 @@ propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# proto-plus
|
||||
py==1.11.0
|
||||
# via retry
|
||||
pyasn1==0.6.3
|
||||
@@ -207,6 +277,7 @@ pydantic==2.11.7
|
||||
# agent-client-protocol
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# litellm
|
||||
# mcp
|
||||
@@ -223,6 +294,7 @@ python-dateutil==2.8.2
|
||||
# via
|
||||
# aiobotocore
|
||||
# botocore
|
||||
# google-cloud-bigquery
|
||||
# kubernetes
|
||||
# posthog
|
||||
python-dotenv==1.1.1
|
||||
@@ -246,6 +318,9 @@ regex==2025.11.3
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# huggingface-hub
|
||||
# kubernetes
|
||||
@@ -305,7 +380,9 @@ typing-extensions==4.15.0
|
||||
# anyio
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# mcp
|
||||
# openai
|
||||
|
||||
@@ -101,6 +101,8 @@ discord-py==2.4.0
|
||||
# via onyx
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docstring-parser==0.17.0
|
||||
# via google-cloud-aiplatform
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
einops==0.8.1
|
||||
@@ -125,12 +127,63 @@ fsspec==2025.10.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
google-api-core==2.28.1
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -259,6 +312,8 @@ openai==2.14.0
|
||||
packaging==24.2
|
||||
# via
|
||||
# accelerate
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
# kombu
|
||||
# transformers
|
||||
@@ -278,6 +333,20 @@ propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# proto-plus
|
||||
psutil==7.1.3
|
||||
# via accelerate
|
||||
py==1.11.0
|
||||
@@ -295,6 +364,7 @@ pydantic==2.11.7
|
||||
# agent-client-protocol
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# litellm
|
||||
# mcp
|
||||
@@ -312,6 +382,7 @@ python-dateutil==2.8.2
|
||||
# aiobotocore
|
||||
# botocore
|
||||
# celery
|
||||
# google-cloud-bigquery
|
||||
# kubernetes
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
@@ -338,6 +409,9 @@ regex==2025.11.3
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# huggingface-hub
|
||||
# kubernetes
|
||||
@@ -425,7 +499,9 @@ typing-extensions==4.15.0
|
||||
# anyio
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# mcp
|
||||
# openai
|
||||
|
||||
@@ -7,7 +7,6 @@ import pytest
|
||||
|
||||
from onyx.connectors.gong.connector import GongConnector
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -32,18 +31,20 @@ def test_gong_basic(
|
||||
mock_get_api_key: MagicMock, # noqa: ARG001
|
||||
gong_connector: GongConnector,
|
||||
) -> None:
|
||||
doc_batch_generator = gong_connector.poll_source(0, time.time())
|
||||
|
||||
doc_batch = next(doc_batch_generator)
|
||||
with pytest.raises(StopIteration):
|
||||
next(doc_batch_generator)
|
||||
|
||||
assert len(doc_batch) == 2
|
||||
checkpoint = gong_connector.build_dummy_checkpoint()
|
||||
|
||||
docs: list[Document] = []
|
||||
for doc in doc_batch:
|
||||
if not isinstance(doc, HierarchyNode):
|
||||
docs.append(doc)
|
||||
while checkpoint.has_more:
|
||||
generator = gong_connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(docs) == 2
|
||||
|
||||
assert docs[0].semantic_identifier == "test with chris"
|
||||
assert docs[1].semantic_identifier == "Testing Gong"
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
|
||||
|
||||
def test_time_str_to_utc() -> None:
|
||||
str_to_dt = {
|
||||
"Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
|
||||
2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
|
||||
2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
|
||||
2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"30 Jun 2023 18:45:01 +0300": datetime.datetime(
|
||||
2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
|
||||
2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
|
||||
2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
}
|
||||
for strptime, expected_datetime in str_to_dt.items():
|
||||
assert time_str_to_utc(strptime) == expected_datetime
|
||||
|
||||
|
||||
def test_time_str_to_utc_recovers_from_concatenated_headers() -> None:
|
||||
# TZ is dropped during recovery, so the expected result is UTC rather
|
||||
# than the original offset.
|
||||
assert time_str_to_utc(
|
||||
'Sat, 3 Nov 2007 14:33:28 -0200To: "jason" <jason@example.net>'
|
||||
) == datetime.datetime(2007, 11, 3, 14, 33, 28, tzinfo=datetime.timezone.utc)
|
||||
|
||||
assert time_str_to_utc(
|
||||
"Fri, 20 Feb 2015 10:30:00 +0500Cc: someone@example.com"
|
||||
) == datetime.datetime(2015, 2, 20, 10, 30, 0, tzinfo=datetime.timezone.utc)
|
||||
|
||||
|
||||
def test_time_str_to_utc_raises_on_impossible_dates() -> None:
|
||||
for bad in (
|
||||
"Wed, 33 Sep 2007 13:42:59 +0100",
|
||||
"Thu, 11 Oct 2007 31:50:55 +0900",
|
||||
"not a date at all",
|
||||
"",
|
||||
):
|
||||
with pytest.raises(ValueError):
|
||||
time_str_to_utc(bad)
|
||||
@@ -1,3 +1,4 @@
|
||||
import copy
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
@@ -8,7 +9,6 @@ from unittest.mock import patch
|
||||
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from onyx.connectors.gmail.connector import _build_time_range_query
|
||||
from onyx.connectors.gmail.connector import GmailCheckpoint
|
||||
from onyx.connectors.gmail.connector import GmailConnector
|
||||
@@ -51,29 +51,43 @@ def test_build_time_range_query() -> None:
|
||||
assert query is None
|
||||
|
||||
|
||||
def test_time_str_to_utc() -> None:
|
||||
str_to_dt = {
|
||||
"Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
|
||||
2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
|
||||
2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
|
||||
2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"30 Jun 2023 18:45:01 +0300": datetime.datetime(
|
||||
2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
|
||||
2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
|
||||
2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
}
|
||||
for strptime, expected_datetime in str_to_dt.items():
|
||||
assert time_str_to_utc(strptime) == expected_datetime
|
||||
def _thread_with_date(date_header: str | None) -> dict[str, Any]:
|
||||
"""Load the fixture thread and replace (or strip, if None) its Date header."""
|
||||
json_path = os.path.join(os.path.dirname(__file__), "thread.json")
|
||||
with open(json_path, "r") as f:
|
||||
thread = cast(dict[str, Any], json.load(f))
|
||||
thread = copy.deepcopy(thread)
|
||||
|
||||
for message in thread["messages"]:
|
||||
headers: list[dict[str, str]] = message["payload"]["headers"]
|
||||
if date_header is None:
|
||||
message["payload"]["headers"] = [
|
||||
h for h in headers if h.get("name") != "Date"
|
||||
]
|
||||
continue
|
||||
|
||||
replaced = False
|
||||
for header in headers:
|
||||
if header.get("name") == "Date":
|
||||
header["value"] = date_header
|
||||
replaced = True
|
||||
break
|
||||
if not replaced:
|
||||
headers.append({"name": "Date", "value": date_header})
|
||||
|
||||
return thread
|
||||
|
||||
|
||||
def test_thread_to_document_skips_unparseable_dates() -> None:
|
||||
for bad_date in (
|
||||
"Wed, 33 Sep 2007 13:42:59 +0100",
|
||||
"Thu, 11 Oct 2007 31:50:55 +0900",
|
||||
"total garbage not even close to a date",
|
||||
):
|
||||
doc = thread_to_document(_thread_with_date(bad_date), "admin@example.com")
|
||||
assert isinstance(doc, Document), f"failed for {bad_date!r}"
|
||||
assert doc.doc_updated_at is None
|
||||
assert doc.id == "192edefb315737c3"
|
||||
|
||||
|
||||
def test_gmail_checkpoint_progression() -> None:
|
||||
|
||||
0
backend/tests/unit/onyx/connectors/gong/__init__.py
Normal file
0
backend/tests/unit/onyx/connectors/gong/__init__.py
Normal file
483
backend/tests/unit/onyx/connectors/gong/test_gong_checkpoint.py
Normal file
483
backend/tests/unit/onyx/connectors/gong/test_gong_checkpoint.py
Normal file
@@ -0,0 +1,483 @@
|
||||
import time
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.connectors.gong.connector import GongConnector
|
||||
from onyx.connectors.gong.connector import GongConnectorCheckpoint
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import Document
|
||||
|
||||
|
||||
def _make_transcript(call_id: str) -> dict[str, Any]:
|
||||
return {
|
||||
"callId": call_id,
|
||||
"transcript": [
|
||||
{
|
||||
"speakerId": "speaker1",
|
||||
"sentences": [{"text": "Hello world"}],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _make_call_detail(call_id: str, title: str) -> dict[str, Any]:
|
||||
return {
|
||||
"metaData": {
|
||||
"id": call_id,
|
||||
"started": "2026-01-15T10:00:00Z",
|
||||
"title": title,
|
||||
"purpose": "Test call",
|
||||
"url": f"https://app.gong.io/call?id={call_id}",
|
||||
"system": "test-system",
|
||||
},
|
||||
"parties": [
|
||||
{
|
||||
"speakerId": "speaker1",
|
||||
"name": "Alice",
|
||||
"emailAddress": "alice@test.com",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def connector() -> GongConnector:
|
||||
connector = GongConnector()
|
||||
connector.load_credentials(
|
||||
{
|
||||
"gong_access_key": "test-key",
|
||||
"gong_access_key_secret": "test-secret",
|
||||
}
|
||||
)
|
||||
return connector
|
||||
|
||||
|
||||
class TestGongConnectorCheckpoint:
|
||||
def test_build_dummy_checkpoint(self, connector: GongConnector) -> None:
|
||||
checkpoint = connector.build_dummy_checkpoint()
|
||||
assert checkpoint.has_more is True
|
||||
assert checkpoint.workspace_ids is None
|
||||
assert checkpoint.workspace_index == 0
|
||||
assert checkpoint.cursor is None
|
||||
|
||||
def test_validate_checkpoint_json(self, connector: GongConnector) -> None:
|
||||
original = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=["ws1", None],
|
||||
workspace_index=1,
|
||||
cursor="abc123",
|
||||
)
|
||||
json_str = original.model_dump_json()
|
||||
restored = connector.validate_checkpoint_json(json_str)
|
||||
assert restored == original
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_first_call_resolves_workspaces(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""First checkpoint call should resolve workspaces and return without fetching."""
|
||||
# No workspaces configured — should resolve to [None]
|
||||
checkpoint = connector.build_dummy_checkpoint()
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
|
||||
# Should return immediately (no yields)
|
||||
with pytest.raises(StopIteration) as exc_info:
|
||||
next(generator)
|
||||
|
||||
new_checkpoint = exc_info.value.value
|
||||
assert new_checkpoint.workspace_ids == [None]
|
||||
assert new_checkpoint.has_more is True
|
||||
assert new_checkpoint.workspace_index == 0
|
||||
|
||||
# No API calls should have been made for workspace resolution
|
||||
# when no workspaces are configured
|
||||
mock_request.assert_not_called()
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_single_page_no_cursor(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""Single page of transcripts with no pagination cursor."""
|
||||
transcript_response = MagicMock()
|
||||
transcript_response.status_code = 200
|
||||
transcript_response.json.return_value = {
|
||||
"callTranscripts": [_make_transcript("call1")],
|
||||
"records": {},
|
||||
}
|
||||
|
||||
details_response = MagicMock()
|
||||
details_response.status_code = 200
|
||||
details_response.json.return_value = {
|
||||
"calls": [_make_call_detail("call1", "Test Call")]
|
||||
}
|
||||
|
||||
mock_request.side_effect = [transcript_response, details_response]
|
||||
|
||||
# Start from a checkpoint that already has workspaces resolved
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=[None],
|
||||
workspace_index=0,
|
||||
)
|
||||
|
||||
docs: list[Document] = []
|
||||
failures: list[ConnectorFailure] = []
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
docs.append(item)
|
||||
elif isinstance(item, ConnectorFailure):
|
||||
failures.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].semantic_identifier == "Test Call"
|
||||
assert len(failures) == 0
|
||||
assert checkpoint.has_more is False
|
||||
assert checkpoint.workspace_index == 1
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_multi_page_with_cursor(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""Two pages of transcripts — cursor advances between checkpoint calls."""
|
||||
# Page 1: returns cursor
|
||||
page1_response = MagicMock()
|
||||
page1_response.status_code = 200
|
||||
page1_response.json.return_value = {
|
||||
"callTranscripts": [_make_transcript("call1")],
|
||||
"records": {"cursor": "page2cursor"},
|
||||
}
|
||||
|
||||
details1_response = MagicMock()
|
||||
details1_response.status_code = 200
|
||||
details1_response.json.return_value = {
|
||||
"calls": [_make_call_detail("call1", "Call One")]
|
||||
}
|
||||
|
||||
# Page 2: no cursor (done)
|
||||
page2_response = MagicMock()
|
||||
page2_response.status_code = 200
|
||||
page2_response.json.return_value = {
|
||||
"callTranscripts": [_make_transcript("call2")],
|
||||
"records": {},
|
||||
}
|
||||
|
||||
details2_response = MagicMock()
|
||||
details2_response.status_code = 200
|
||||
details2_response.json.return_value = {
|
||||
"calls": [_make_call_detail("call2", "Call Two")]
|
||||
}
|
||||
|
||||
mock_request.side_effect = [
|
||||
page1_response,
|
||||
details1_response,
|
||||
page2_response,
|
||||
details2_response,
|
||||
]
|
||||
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=[None],
|
||||
workspace_index=0,
|
||||
)
|
||||
|
||||
all_docs: list[Document] = []
|
||||
|
||||
# First checkpoint call — page 1
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
all_docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(all_docs) == 1
|
||||
assert checkpoint.cursor == "page2cursor"
|
||||
assert checkpoint.has_more is True
|
||||
|
||||
# Second checkpoint call — page 2
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
all_docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(all_docs) == 2
|
||||
assert all_docs[0].semantic_identifier == "Call One"
|
||||
assert all_docs[1].semantic_identifier == "Call Two"
|
||||
assert checkpoint.has_more is False
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_missing_call_details_yields_failure(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""When call details are missing after retries, yield ConnectorFailure."""
|
||||
transcript_response = MagicMock()
|
||||
transcript_response.status_code = 200
|
||||
transcript_response.json.return_value = {
|
||||
"callTranscripts": [_make_transcript("call1")],
|
||||
"records": {},
|
||||
}
|
||||
|
||||
# Return empty call details every time (simulating the race condition)
|
||||
empty_details = MagicMock()
|
||||
empty_details.status_code = 200
|
||||
empty_details.json.return_value = {"calls": []}
|
||||
|
||||
mock_request.side_effect = [transcript_response] + [
|
||||
empty_details
|
||||
] * GongConnector.MAX_CALL_DETAILS_ATTEMPTS
|
||||
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=[None],
|
||||
workspace_index=0,
|
||||
)
|
||||
|
||||
failures: list[ConnectorFailure] = []
|
||||
docs: list[Document] = []
|
||||
|
||||
with patch("onyx.connectors.gong.connector.time.sleep"):
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, ConnectorFailure):
|
||||
failures.append(item)
|
||||
elif isinstance(item, Document):
|
||||
docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(docs) == 0
|
||||
assert len(failures) == 1
|
||||
assert failures[0].failed_document is not None
|
||||
assert failures[0].failed_document.document_id == "call1"
|
||||
assert checkpoint.has_more is False
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_multi_workspace_iteration(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""Checkpoint iterates through multiple workspaces."""
|
||||
# Workspace 1: one call
|
||||
ws1_transcript = MagicMock()
|
||||
ws1_transcript.status_code = 200
|
||||
ws1_transcript.json.return_value = {
|
||||
"callTranscripts": [_make_transcript("call_ws1")],
|
||||
"records": {},
|
||||
}
|
||||
ws1_details = MagicMock()
|
||||
ws1_details.status_code = 200
|
||||
ws1_details.json.return_value = {
|
||||
"calls": [_make_call_detail("call_ws1", "WS1 Call")]
|
||||
}
|
||||
|
||||
# Workspace 2: one call
|
||||
ws2_transcript = MagicMock()
|
||||
ws2_transcript.status_code = 200
|
||||
ws2_transcript.json.return_value = {
|
||||
"callTranscripts": [_make_transcript("call_ws2")],
|
||||
"records": {},
|
||||
}
|
||||
ws2_details = MagicMock()
|
||||
ws2_details.status_code = 200
|
||||
ws2_details.json.return_value = {
|
||||
"calls": [_make_call_detail("call_ws2", "WS2 Call")]
|
||||
}
|
||||
|
||||
mock_request.side_effect = [
|
||||
ws1_transcript,
|
||||
ws1_details,
|
||||
ws2_transcript,
|
||||
ws2_details,
|
||||
]
|
||||
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=["ws1_id", "ws2_id"],
|
||||
workspace_index=0,
|
||||
)
|
||||
|
||||
all_docs: list[Document] = []
|
||||
|
||||
# Checkpoint call 1 — workspace 1
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
all_docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert checkpoint.workspace_index == 1
|
||||
assert checkpoint.has_more is True
|
||||
|
||||
# Checkpoint call 2 — workspace 2
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
all_docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(all_docs) == 2
|
||||
assert all_docs[0].semantic_identifier == "WS1 Call"
|
||||
assert all_docs[1].semantic_identifier == "WS2 Call"
|
||||
assert checkpoint.has_more is False
|
||||
assert checkpoint.workspace_index == 2
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_empty_workspace_404(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""404 from transcript API means no calls — workspace exhausted."""
|
||||
response_404 = MagicMock()
|
||||
response_404.status_code = 404
|
||||
|
||||
mock_request.return_value = response_404
|
||||
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=[None],
|
||||
workspace_index=0,
|
||||
)
|
||||
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
next(generator)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert checkpoint.has_more is False
|
||||
assert checkpoint.workspace_index == 1
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_retry_only_fetches_missing_ids(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""Retry for missing call details should only re-request the missing IDs."""
|
||||
transcript_response = MagicMock()
|
||||
transcript_response.status_code = 200
|
||||
transcript_response.json.return_value = {
|
||||
"callTranscripts": [
|
||||
_make_transcript("call1"),
|
||||
_make_transcript("call2"),
|
||||
],
|
||||
"records": {},
|
||||
}
|
||||
|
||||
# First fetch: returns call1 but not call2
|
||||
partial_details = MagicMock()
|
||||
partial_details.status_code = 200
|
||||
partial_details.json.return_value = {
|
||||
"calls": [_make_call_detail("call1", "Call One")]
|
||||
}
|
||||
|
||||
# Second fetch (retry): returns call2
|
||||
missing_details = MagicMock()
|
||||
missing_details.status_code = 200
|
||||
missing_details.json.return_value = {
|
||||
"calls": [_make_call_detail("call2", "Call Two")]
|
||||
}
|
||||
|
||||
mock_request.side_effect = [
|
||||
transcript_response,
|
||||
partial_details,
|
||||
missing_details,
|
||||
]
|
||||
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=[None],
|
||||
workspace_index=0,
|
||||
)
|
||||
|
||||
docs: list[Document] = []
|
||||
with patch("onyx.connectors.gong.connector.time.sleep"):
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
docs.append(item)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
assert len(docs) == 2
|
||||
assert docs[0].semantic_identifier == "Call One"
|
||||
assert docs[1].semantic_identifier == "Call Two"
|
||||
|
||||
# Verify: 3 API calls total (1 transcript + 1 full details + 1 retry for missing only)
|
||||
assert mock_request.call_count == 3
|
||||
# The retry call should only request call2, not both
|
||||
retry_call_body = mock_request.call_args_list[2][1]["json"]
|
||||
assert retry_call_body["filter"]["callIds"] == ["call2"]
|
||||
|
||||
@patch.object(GongConnector, "_throttled_request")
|
||||
def test_expired_cursor_restarts_workspace(
|
||||
self,
|
||||
mock_request: MagicMock,
|
||||
connector: GongConnector,
|
||||
) -> None:
|
||||
"""Expired pagination cursor resets checkpoint to restart the workspace."""
|
||||
expired_response = MagicMock()
|
||||
expired_response.status_code = 400
|
||||
expired_response.ok = False
|
||||
expired_response.text = '{"requestId":"abc","errors":["cursor has expired"]}'
|
||||
|
||||
mock_request.return_value = expired_response
|
||||
|
||||
# Checkpoint mid-pagination with a (now-expired) cursor
|
||||
checkpoint = GongConnectorCheckpoint(
|
||||
has_more=True,
|
||||
workspace_ids=[None],
|
||||
workspace_index=0,
|
||||
cursor="stale-cursor",
|
||||
)
|
||||
|
||||
docs: list[Document] = []
|
||||
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
|
||||
try:
|
||||
while True:
|
||||
item = next(generator)
|
||||
if isinstance(item, Document):
|
||||
docs.append(item)
|
||||
except StopIteration as e:
|
||||
checkpoint = e.value
|
||||
|
||||
assert len(docs) == 0
|
||||
# Cursor reset so next call restarts the workspace from scratch
|
||||
assert checkpoint.cursor is None
|
||||
assert checkpoint.workspace_index == 0
|
||||
assert checkpoint.has_more is True
|
||||
@@ -12,6 +12,10 @@ dependency on pypdf internals (pypdf.generic).
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.file_processing import extract_file_text
|
||||
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
|
||||
from onyx.file_processing.extract_file_text import pdf_to_text
|
||||
from onyx.file_processing.extract_file_text import read_pdf_file
|
||||
from onyx.file_processing.password_validation import is_pdf_protected
|
||||
@@ -96,6 +100,80 @@ class TestReadPdfFile:
|
||||
# Returned list is empty when callback is used
|
||||
assert images == []
|
||||
|
||||
def test_image_cap_skips_images_above_limit(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""When the embedded-image cap is exceeded, remaining images are skipped.
|
||||
|
||||
The cap protects the user-file-processing worker from OOMing on PDFs
|
||||
with thousands of embedded images. Setting the cap to 0 should yield
|
||||
zero extracted images even though the fixture has one.
|
||||
"""
|
||||
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
|
||||
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
|
||||
assert images == []
|
||||
|
||||
def test_image_cap_at_limit_extracts_up_to_cap(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""A cap >= image count behaves identically to the uncapped path."""
|
||||
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 100)
|
||||
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
|
||||
assert len(images) == 1
|
||||
|
||||
def test_image_cap_with_callback_stops_streaming_at_limit(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""The cap also short-circuits the streaming callback path."""
|
||||
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
|
||||
collected: list[tuple[bytes, str]] = []
|
||||
|
||||
def callback(data: bytes, name: str) -> None:
|
||||
collected.append((data, name))
|
||||
|
||||
read_pdf_file(
|
||||
_load("with_image.pdf"), extract_images=True, image_callback=callback
|
||||
)
|
||||
assert collected == []
|
||||
|
||||
|
||||
# ── count_pdf_embedded_images ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCountPdfEmbeddedImages:
|
||||
def test_returns_count_for_normal_pdf(self) -> None:
|
||||
assert count_pdf_embedded_images(_load("with_image.pdf"), cap=10) == 1
|
||||
|
||||
def test_short_circuits_above_cap(self) -> None:
|
||||
# with_image.pdf has 1 image. cap=0 means "anything > 0 is over cap" —
|
||||
# function returns on first increment as the over-cap sentinel.
|
||||
assert count_pdf_embedded_images(_load("with_image.pdf"), cap=0) == 1
|
||||
|
||||
def test_returns_zero_for_pdf_without_images(self) -> None:
|
||||
assert count_pdf_embedded_images(_load("simple.pdf"), cap=10) == 0
|
||||
|
||||
def test_returns_zero_for_invalid_pdf(self) -> None:
|
||||
assert count_pdf_embedded_images(BytesIO(b"not a pdf"), cap=10) == 0
|
||||
|
||||
def test_returns_zero_for_password_locked_pdf(self) -> None:
|
||||
# encrypted.pdf has an open password; we can't inspect without it, so
|
||||
# the helper returns 0 — callers rely on the password-protected check
|
||||
# that runs earlier in the upload pipeline.
|
||||
assert count_pdf_embedded_images(_load("encrypted.pdf"), cap=10) == 0
|
||||
|
||||
def test_inspects_owner_password_only_pdf(self) -> None:
|
||||
# owner_protected.pdf is encrypted but has no open password. It should
|
||||
# decrypt with an empty string and count images normally. The fixture
|
||||
# has zero images, so 0 is a real count (not the "bail on encrypted"
|
||||
# path).
|
||||
assert count_pdf_embedded_images(_load("owner_protected.pdf"), cap=10) == 0
|
||||
|
||||
def test_preserves_file_position(self) -> None:
|
||||
pdf = _load("with_image.pdf")
|
||||
pdf.seek(42)
|
||||
count_pdf_embedded_images(pdf, cap=10)
|
||||
assert pdf.tell() == 42
|
||||
|
||||
|
||||
# ── pdf_to_text ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
"""Tests for generic Celery task lifecycle Prometheus metrics."""
|
||||
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.background.celery.apps.app_base import on_before_task_publish
|
||||
from onyx.server.metrics.celery_task_metrics import _task_start_times
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
|
||||
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
|
||||
from onyx.server.metrics.celery_task_metrics import TASK_COMPLETED
|
||||
from onyx.server.metrics.celery_task_metrics import TASK_DURATION
|
||||
from onyx.server.metrics.celery_task_metrics import TASK_QUEUE_WAIT
|
||||
from onyx.server.metrics.celery_task_metrics import TASK_STARTED
|
||||
from onyx.server.metrics.celery_task_metrics import TASKS_ACTIVE
|
||||
|
||||
@@ -22,11 +25,18 @@ def reset_metrics() -> Iterator[None]:
|
||||
_task_start_times.clear()
|
||||
|
||||
|
||||
def _make_task(name: str = "test_task", queue: str = "test_queue") -> MagicMock:
|
||||
def _make_task(
|
||||
name: str = "test_task",
|
||||
queue: str = "test_queue",
|
||||
enqueued_at: float | None = None,
|
||||
) -> MagicMock:
|
||||
task = MagicMock()
|
||||
task.name = name
|
||||
task.request = MagicMock()
|
||||
task.request.delivery_info = {"routing_key": queue}
|
||||
task.request.headers = (
|
||||
{"enqueued_at": enqueued_at} if enqueued_at is not None else {}
|
||||
)
|
||||
return task
|
||||
|
||||
|
||||
@@ -72,6 +82,35 @@ class TestCeleryTaskPrerun:
|
||||
on_celery_task_prerun("task-1", task)
|
||||
assert "task-1" in _task_start_times
|
||||
|
||||
def test_observes_queue_wait_when_enqueued_at_present(self) -> None:
|
||||
enqueued_at = time.time() - 30 # simulates 30s wait
|
||||
task = _make_task(enqueued_at=enqueued_at)
|
||||
|
||||
before = TASK_QUEUE_WAIT.labels(
|
||||
task_name="test_task", queue="test_queue"
|
||||
)._sum.get()
|
||||
|
||||
on_celery_task_prerun("task-1", task)
|
||||
|
||||
after = TASK_QUEUE_WAIT.labels(
|
||||
task_name="test_task", queue="test_queue"
|
||||
)._sum.get()
|
||||
assert after >= before + 30
|
||||
|
||||
def test_skips_queue_wait_when_enqueued_at_missing(self) -> None:
|
||||
task = _make_task() # no enqueued_at in headers
|
||||
|
||||
before = TASK_QUEUE_WAIT.labels(
|
||||
task_name="test_task", queue="test_queue"
|
||||
)._sum.get()
|
||||
|
||||
on_celery_task_prerun("task-2", task)
|
||||
|
||||
after = TASK_QUEUE_WAIT.labels(
|
||||
task_name="test_task", queue="test_queue"
|
||||
)._sum.get()
|
||||
assert after == before
|
||||
|
||||
|
||||
class TestCeleryTaskPostrun:
|
||||
def test_increments_completed_success(self) -> None:
|
||||
@@ -151,3 +190,15 @@ class TestCeleryTaskPostrun:
|
||||
task = _make_task()
|
||||
on_celery_task_postrun("task-1", task, "SUCCESS")
|
||||
# Should not raise
|
||||
|
||||
|
||||
class TestBeforeTaskPublish:
|
||||
def test_stamps_enqueued_at_into_headers(self) -> None:
|
||||
before = time.time()
|
||||
headers: dict = {}
|
||||
on_before_task_publish(headers=headers)
|
||||
assert "enqueued_at" in headers
|
||||
assert headers["enqueued_at"] >= before
|
||||
|
||||
def test_noop_when_headers_is_none(self) -> None:
|
||||
on_before_task_publish(headers=None) # should not raise
|
||||
|
||||
204
backend/tests/unit/server/metrics/test_deletion_metrics.py
Normal file
204
backend/tests/unit/server/metrics/test_deletion_metrics.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""Tests for deletion-specific Prometheus metrics."""
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.server.metrics.deletion_metrics import DELETION_BLOCKED
|
||||
from onyx.server.metrics.deletion_metrics import DELETION_COMPLETED
|
||||
from onyx.server.metrics.deletion_metrics import DELETION_FENCE_RESET
|
||||
from onyx.server.metrics.deletion_metrics import DELETION_STARTED
|
||||
from onyx.server.metrics.deletion_metrics import DELETION_TASKSET_DURATION
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_blocked
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_completed
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_fence_reset
|
||||
from onyx.server.metrics.deletion_metrics import inc_deletion_started
|
||||
from onyx.server.metrics.deletion_metrics import observe_deletion_taskset_duration
|
||||
|
||||
|
||||
class TestIncDeletionStarted:
|
||||
def test_increments_counter(self) -> None:
|
||||
before = DELETION_STARTED.labels(tenant_id="t1")._value.get()
|
||||
|
||||
inc_deletion_started("t1")
|
||||
|
||||
after = DELETION_STARTED.labels(tenant_id="t1")._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_labels_by_tenant(self) -> None:
|
||||
before_t1 = DELETION_STARTED.labels(tenant_id="t1")._value.get()
|
||||
before_t2 = DELETION_STARTED.labels(tenant_id="t2")._value.get()
|
||||
|
||||
inc_deletion_started("t1")
|
||||
|
||||
assert DELETION_STARTED.labels(tenant_id="t1")._value.get() == before_t1 + 1
|
||||
assert DELETION_STARTED.labels(tenant_id="t2")._value.get() == before_t2
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
DELETION_STARTED,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
inc_deletion_started("t1")
|
||||
|
||||
|
||||
class TestIncDeletionCompleted:
|
||||
def test_increments_counter(self) -> None:
|
||||
before = DELETION_COMPLETED.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._value.get()
|
||||
|
||||
inc_deletion_completed("t1", "success")
|
||||
|
||||
after = DELETION_COMPLETED.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_labels_by_outcome(self) -> None:
|
||||
before_success = DELETION_COMPLETED.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._value.get()
|
||||
before_failure = DELETION_COMPLETED.labels(
|
||||
tenant_id="t1", outcome="failure"
|
||||
)._value.get()
|
||||
|
||||
inc_deletion_completed("t1", "success")
|
||||
|
||||
assert (
|
||||
DELETION_COMPLETED.labels(tenant_id="t1", outcome="success")._value.get()
|
||||
== before_success + 1
|
||||
)
|
||||
assert (
|
||||
DELETION_COMPLETED.labels(tenant_id="t1", outcome="failure")._value.get()
|
||||
== before_failure
|
||||
)
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
DELETION_COMPLETED,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
inc_deletion_completed("t1", "success")
|
||||
|
||||
|
||||
class TestObserveDeletionTasksetDuration:
|
||||
def test_observes_duration(self) -> None:
|
||||
before = DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._sum.get()
|
||||
|
||||
observe_deletion_taskset_duration("t1", "success", 120.0)
|
||||
|
||||
after = DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._sum.get()
|
||||
assert after == pytest.approx(before + 120.0)
|
||||
|
||||
def test_labels_by_tenant(self) -> None:
|
||||
before_t1 = DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._sum.get()
|
||||
before_t2 = DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t2", outcome="success"
|
||||
)._sum.get()
|
||||
|
||||
observe_deletion_taskset_duration("t1", "success", 60.0)
|
||||
|
||||
assert DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._sum.get() == pytest.approx(before_t1 + 60.0)
|
||||
assert DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t2", outcome="success"
|
||||
)._sum.get() == pytest.approx(before_t2)
|
||||
|
||||
def test_labels_by_outcome(self) -> None:
|
||||
before_success = DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._sum.get()
|
||||
before_failure = DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="failure"
|
||||
)._sum.get()
|
||||
|
||||
observe_deletion_taskset_duration("t1", "failure", 45.0)
|
||||
|
||||
assert DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="success"
|
||||
)._sum.get() == pytest.approx(before_success)
|
||||
assert DELETION_TASKSET_DURATION.labels(
|
||||
tenant_id="t1", outcome="failure"
|
||||
)._sum.get() == pytest.approx(before_failure + 45.0)
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
DELETION_TASKSET_DURATION,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
observe_deletion_taskset_duration("t1", "success", 10.0)
|
||||
|
||||
|
||||
class TestIncDeletionBlocked:
|
||||
def test_increments_counter(self) -> None:
|
||||
before = DELETION_BLOCKED.labels(
|
||||
tenant_id="t1", blocker="indexing"
|
||||
)._value.get()
|
||||
|
||||
inc_deletion_blocked("t1", "indexing")
|
||||
|
||||
after = DELETION_BLOCKED.labels(tenant_id="t1", blocker="indexing")._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_labels_by_blocker(self) -> None:
|
||||
before_idx = DELETION_BLOCKED.labels(
|
||||
tenant_id="t1", blocker="indexing"
|
||||
)._value.get()
|
||||
before_prune = DELETION_BLOCKED.labels(
|
||||
tenant_id="t1", blocker="pruning"
|
||||
)._value.get()
|
||||
|
||||
inc_deletion_blocked("t1", "indexing")
|
||||
|
||||
assert (
|
||||
DELETION_BLOCKED.labels(tenant_id="t1", blocker="indexing")._value.get()
|
||||
== before_idx + 1
|
||||
)
|
||||
assert (
|
||||
DELETION_BLOCKED.labels(tenant_id="t1", blocker="pruning")._value.get()
|
||||
== before_prune
|
||||
)
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
DELETION_BLOCKED,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
inc_deletion_blocked("t1", "indexing")
|
||||
|
||||
|
||||
class TestIncDeletionFenceReset:
|
||||
def test_increments_counter(self) -> None:
|
||||
before = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
|
||||
|
||||
inc_deletion_fence_reset("t1")
|
||||
|
||||
after = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
|
||||
assert after == before + 1
|
||||
|
||||
def test_labels_by_tenant(self) -> None:
|
||||
before_t1 = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
|
||||
before_t2 = DELETION_FENCE_RESET.labels(tenant_id="t2")._value.get()
|
||||
|
||||
inc_deletion_fence_reset("t1")
|
||||
|
||||
assert DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get() == before_t1 + 1
|
||||
assert DELETION_FENCE_RESET.labels(tenant_id="t2")._value.get() == before_t2
|
||||
|
||||
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
DELETION_FENCE_RESET,
|
||||
"labels",
|
||||
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
inc_deletion_fence_reset("t1")
|
||||
@@ -1,16 +1,11 @@
|
||||
"""Tests for indexing pipeline Prometheus collectors."""
|
||||
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
|
||||
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
|
||||
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
|
||||
|
||||
|
||||
@@ -18,7 +13,7 @@ from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
|
||||
def _mock_broker_client() -> Iterator[None]:
|
||||
"""Patch celery_get_broker_client for all collector tests."""
|
||||
with patch(
|
||||
"onyx.background.celery.celery_redis.celery_get_broker_client",
|
||||
"onyx.server.metrics.indexing_pipeline.celery_get_broker_client",
|
||||
return_value=MagicMock(),
|
||||
):
|
||||
yield
|
||||
@@ -137,212 +132,3 @@ class TestQueueDepthCollector:
|
||||
stale_result = collector.collect()
|
||||
|
||||
assert stale_result is good_result
|
||||
|
||||
|
||||
class TestIndexAttemptCollector:
|
||||
def test_returns_empty_when_not_configured(self) -> None:
|
||||
collector = IndexAttemptCollector()
|
||||
assert collector.collect() == []
|
||||
|
||||
def test_returns_empty_describe(self) -> None:
|
||||
collector = IndexAttemptCollector()
|
||||
assert collector.describe() == []
|
||||
|
||||
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
|
||||
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
|
||||
def test_collects_index_attempts(
|
||||
self,
|
||||
mock_get_session: MagicMock,
|
||||
mock_get_tenants: MagicMock,
|
||||
) -> None:
|
||||
collector = IndexAttemptCollector(cache_ttl=0)
|
||||
collector.configure()
|
||||
|
||||
mock_get_tenants.return_value = ["public"]
|
||||
|
||||
mock_session = MagicMock()
|
||||
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
||||
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
from onyx.db.enums import IndexingStatus
|
||||
|
||||
mock_row = (
|
||||
IndexingStatus.IN_PROGRESS,
|
||||
MagicMock(value="web"),
|
||||
81,
|
||||
"Table Tennis Blade Guide",
|
||||
2,
|
||||
)
|
||||
mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
|
||||
mock_row
|
||||
]
|
||||
|
||||
families = collector.collect()
|
||||
assert len(families) == 1
|
||||
assert families[0].name == "onyx_index_attempts_active"
|
||||
assert len(families[0].samples) == 1
|
||||
sample = families[0].samples[0]
|
||||
assert sample.labels == {
|
||||
"status": "in_progress",
|
||||
"source": "web",
|
||||
"tenant_id": "public",
|
||||
"connector_name": "Table Tennis Blade Guide",
|
||||
"cc_pair_id": "81",
|
||||
}
|
||||
assert sample.value == 2
|
||||
|
||||
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
|
||||
def test_handles_db_error_gracefully(
|
||||
self,
|
||||
mock_get_tenants: MagicMock,
|
||||
) -> None:
|
||||
collector = IndexAttemptCollector(cache_ttl=0)
|
||||
collector.configure()
|
||||
|
||||
mock_get_tenants.side_effect = Exception("DB down")
|
||||
families = collector.collect()
|
||||
# No stale cache, so returns empty
|
||||
assert families == []
|
||||
|
||||
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
|
||||
def test_skips_none_tenant_ids(
|
||||
self,
|
||||
mock_get_tenants: MagicMock,
|
||||
) -> None:
|
||||
collector = IndexAttemptCollector(cache_ttl=0)
|
||||
collector.configure()
|
||||
|
||||
mock_get_tenants.return_value = [None]
|
||||
families = collector.collect()
|
||||
assert len(families) == 1 # Returns the gauge family, just with no samples
|
||||
assert len(families[0].samples) == 0
|
||||
|
||||
|
||||
class TestConnectorHealthCollector:
|
||||
def test_returns_empty_when_not_configured(self) -> None:
|
||||
collector = ConnectorHealthCollector()
|
||||
assert collector.collect() == []
|
||||
|
||||
def test_returns_empty_describe(self) -> None:
|
||||
collector = ConnectorHealthCollector()
|
||||
assert collector.describe() == []
|
||||
|
||||
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
|
||||
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
|
||||
def test_collects_connector_health(
|
||||
self,
|
||||
mock_get_session: MagicMock,
|
||||
mock_get_tenants: MagicMock,
|
||||
) -> None:
|
||||
collector = ConnectorHealthCollector(cache_ttl=0)
|
||||
collector.configure()
|
||||
|
||||
mock_get_tenants.return_value = ["public"]
|
||||
|
||||
mock_session = MagicMock()
|
||||
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
||||
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
last_success = now - timedelta(hours=2)
|
||||
|
||||
mock_status = MagicMock(value="ACTIVE")
|
||||
mock_source = MagicMock(value="google_drive")
|
||||
# Row: (id, status, in_error, last_success, name, source)
|
||||
mock_row = (
|
||||
42,
|
||||
mock_status,
|
||||
True, # in_repeated_error_state
|
||||
last_success,
|
||||
"My GDrive Connector",
|
||||
mock_source,
|
||||
)
|
||||
mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
|
||||
|
||||
# Mock the index attempt queries (error counts + docs counts)
|
||||
mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (
|
||||
[]
|
||||
)
|
||||
|
||||
families = collector.collect()
|
||||
|
||||
assert len(families) == 6
|
||||
names = {f.name for f in families}
|
||||
assert names == {
|
||||
"onyx_connector_last_success_age_seconds",
|
||||
"onyx_connector_in_error_state",
|
||||
"onyx_connectors_by_status",
|
||||
"onyx_connectors_in_error_total",
|
||||
"onyx_connector_docs_indexed",
|
||||
"onyx_connector_error_count",
|
||||
}
|
||||
|
||||
staleness = next(
|
||||
f for f in families if f.name == "onyx_connector_last_success_age_seconds"
|
||||
)
|
||||
assert len(staleness.samples) == 1
|
||||
assert staleness.samples[0].value == pytest.approx(7200, abs=5)
|
||||
|
||||
error_state = next(
|
||||
f for f in families if f.name == "onyx_connector_in_error_state"
|
||||
)
|
||||
assert error_state.samples[0].value == 1.0
|
||||
|
||||
by_status = next(f for f in families if f.name == "onyx_connectors_by_status")
|
||||
assert by_status.samples[0].labels == {
|
||||
"tenant_id": "public",
|
||||
"status": "ACTIVE",
|
||||
}
|
||||
assert by_status.samples[0].value == 1
|
||||
|
||||
error_total = next(
|
||||
f for f in families if f.name == "onyx_connectors_in_error_total"
|
||||
)
|
||||
assert error_total.samples[0].value == 1
|
||||
|
||||
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
|
||||
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
|
||||
def test_skips_staleness_when_no_last_success(
|
||||
self,
|
||||
mock_get_session: MagicMock,
|
||||
mock_get_tenants: MagicMock,
|
||||
) -> None:
|
||||
collector = ConnectorHealthCollector(cache_ttl=0)
|
||||
collector.configure()
|
||||
|
||||
mock_get_tenants.return_value = ["public"]
|
||||
|
||||
mock_session = MagicMock()
|
||||
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
|
||||
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
mock_status = MagicMock(value="INITIAL_INDEXING")
|
||||
mock_source = MagicMock(value="slack")
|
||||
mock_row = (
|
||||
10,
|
||||
mock_status,
|
||||
False,
|
||||
None, # no last_successful_index_time
|
||||
0,
|
||||
mock_source,
|
||||
)
|
||||
mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
|
||||
|
||||
families = collector.collect()
|
||||
|
||||
staleness = next(
|
||||
f for f in families if f.name == "onyx_connector_last_success_age_seconds"
|
||||
)
|
||||
assert len(staleness.samples) == 0
|
||||
|
||||
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
|
||||
def test_handles_db_error_gracefully(
|
||||
self,
|
||||
mock_get_tenants: MagicMock,
|
||||
) -> None:
|
||||
collector = ConnectorHealthCollector(cache_ttl=0)
|
||||
collector.configure()
|
||||
|
||||
mock_get_tenants.side_effect = Exception("DB down")
|
||||
families = collector.collect()
|
||||
assert families == []
|
||||
|
||||
@@ -129,12 +129,36 @@ class TestWorkerHealthCollector:
|
||||
up = families[1]
|
||||
assert up.name == "onyx_celery_worker_up"
|
||||
assert len(up.samples) == 3
|
||||
# Labels use short names (before @)
|
||||
labels = {s.labels["worker"] for s in up.samples}
|
||||
assert labels == {"primary", "docfetching", "monitoring"}
|
||||
label_pairs = {
|
||||
(s.labels["worker_type"], s.labels["hostname"]) for s in up.samples
|
||||
}
|
||||
assert label_pairs == {
|
||||
("primary", "host1"),
|
||||
("docfetching", "host1"),
|
||||
("monitoring", "host1"),
|
||||
}
|
||||
for sample in up.samples:
|
||||
assert sample.value == 1
|
||||
|
||||
def test_replicas_of_same_worker_type_are_distinct(self) -> None:
|
||||
"""Regression: ``docprocessing@pod-1`` and ``docprocessing@pod-2`` must
|
||||
produce separate samples, not collapse into one duplicate-timestamp
|
||||
series.
|
||||
"""
|
||||
monitor = WorkerHeartbeatMonitor(MagicMock())
|
||||
monitor._on_heartbeat({"hostname": "docprocessing@pod-1"})
|
||||
monitor._on_heartbeat({"hostname": "docprocessing@pod-2"})
|
||||
monitor._on_heartbeat({"hostname": "docprocessing@pod-3"})
|
||||
|
||||
collector = WorkerHealthCollector(cache_ttl=0)
|
||||
collector.set_monitor(monitor)
|
||||
|
||||
up = collector.collect()[1]
|
||||
assert len(up.samples) == 3
|
||||
hostnames = {s.labels["hostname"] for s in up.samples}
|
||||
assert hostnames == {"pod-1", "pod-2", "pod-3"}
|
||||
assert all(s.labels["worker_type"] == "docprocessing" for s in up.samples)
|
||||
|
||||
def test_reports_dead_worker(self) -> None:
|
||||
monitor = WorkerHeartbeatMonitor(MagicMock())
|
||||
monitor._on_heartbeat({"hostname": "primary@host1"})
|
||||
@@ -151,9 +175,9 @@ class TestWorkerHealthCollector:
|
||||
assert active.samples[0].value == 1
|
||||
|
||||
up = families[1]
|
||||
samples_by_name = {s.labels["worker"]: s.value for s in up.samples}
|
||||
assert samples_by_name["primary"] == 1
|
||||
assert samples_by_name["monitoring"] == 0
|
||||
samples_by_type = {s.labels["worker_type"]: s.value for s in up.samples}
|
||||
assert samples_by_type["primary"] == 1
|
||||
assert samples_by_type["monitoring"] == 0
|
||||
|
||||
def test_empty_monitor_returns_zero(self) -> None:
|
||||
monitor = WorkerHeartbeatMonitor(MagicMock())
|
||||
|
||||
@@ -217,11 +217,23 @@ Enriches docfetching and docprocessing tasks with connector-level labels. Silent
|
||||
| `onyx_indexing_task_completed_total` | Counter | `task_name`, `source`, `tenant_id`, `cc_pair_id`, `outcome` | Indexing tasks completed per connector |
|
||||
| `onyx_indexing_task_duration_seconds` | Histogram | `task_name`, `source`, `tenant_id` | Indexing task duration by connector type |
|
||||
|
||||
`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string). The pull-based collectors on the monitoring worker include it since they have bounded cardinality (one series per connector).
|
||||
`connector_name` is intentionally excluded from these per-task counters to avoid unbounded cardinality (it's a free-form user string).
|
||||
|
||||
### Connector Health Metrics (`onyx.server.metrics.connector_health_metrics`)
|
||||
|
||||
Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration). Unlike the per-task counters above, these include `connector_name` because their cardinality is bounded by the number of connectors (one series per connector), not by the number of task executions.
|
||||
|
||||
| Metric | Type | Labels | Description |
|
||||
| ----------------------------------------------- | ------- | --------------------------------------------------------------- | ------------------------------------------------------------- |
|
||||
| `onyx_index_attempt_transitions_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`, `status` | Index attempt status transitions (in_progress, success, etc.) |
|
||||
| `onyx_connector_in_error_state` | Gauge | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Whether connector is in repeated error state (1=yes, 0=no) |
|
||||
| `onyx_connector_last_success_timestamp_seconds` | Gauge | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Unix timestamp of last successful indexing |
|
||||
| `onyx_connector_docs_indexed_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Total documents indexed per connector (monotonic) |
|
||||
| `onyx_connector_indexing_errors_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Total failed index attempts per connector (monotonic) |
|
||||
|
||||
### Pull-Based Collectors (`onyx.server.metrics.indexing_pipeline`)
|
||||
|
||||
Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at scrape time with a 30-second TTL cache.
|
||||
Registered only in the **Monitoring** worker. Collectors query Redis at scrape time with a 30-second TTL cache and a 120-second timeout to prevent the `/metrics` endpoint from hanging.
|
||||
|
||||
| Metric | Type | Labels | Description |
|
||||
| ------------------------------------ | ----- | ------- | ----------------------------------- |
|
||||
@@ -229,8 +241,6 @@ Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at
|
||||
| `onyx_queue_unacked` | Gauge | `queue` | Unacknowledged messages per queue |
|
||||
| `onyx_queue_oldest_task_age_seconds` | Gauge | `queue` | Age of the oldest task in the queue |
|
||||
|
||||
Plus additional connector health, index attempt, and worker heartbeat metrics — see `indexing_pipeline.py` for the full list.
|
||||
|
||||
### Adding Metrics to a Worker
|
||||
|
||||
Currently only the docfetching and docprocessing workers have push-based task metrics wired up. To add metrics to another worker (e.g. heavy, light, primary):
|
||||
|
||||
@@ -12,7 +12,7 @@ dependencies = [
|
||||
"cohere==5.6.1",
|
||||
"fastapi==0.133.1",
|
||||
"google-genai==1.52.0",
|
||||
"litellm==1.81.6",
|
||||
"litellm[google]==1.81.6",
|
||||
"openai==2.14.0",
|
||||
"pydantic==2.11.7",
|
||||
"prometheus_client>=0.21.1",
|
||||
|
||||
229
uv.lock
generated
229
uv.lock
generated
@@ -2115,6 +2115,12 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/d4/90197b416cb61cefd316964fd9e7bd8324bcbafabf40eef14a9f20b81974/google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c", size = 173706, upload-time = "2025-10-28T21:34:50.151Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
grpc = [
|
||||
{ name = "grpcio" },
|
||||
{ name = "grpcio-status" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-api-python-client"
|
||||
version = "2.86.0"
|
||||
@@ -2172,6 +2178,124 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4a/07/8d9a8186e6768b55dfffeb57c719bc03770cf8a970a074616ae6f9e26a57/google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb", size = 18926, upload-time = "2023-02-07T20:53:18.837Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-aiplatform"
|
||||
version = "1.133.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "docstring-parser" },
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-cloud-bigquery" },
|
||||
{ name = "google-cloud-resource-manager" },
|
||||
{ name = "google-cloud-storage" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "packaging" },
|
||||
{ name = "proto-plus" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d4/be/31ce7fd658ddebafbe5583977ddee536b2bacc491ad10b5a067388aec66f/google_cloud_aiplatform-1.133.0.tar.gz", hash = "sha256:3a6540711956dd178daaab3c2c05db476e46d94ac25912b8cf4f59b00b058ae0", size = 9921309, upload-time = "2026-01-08T22:11:25.079Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/01/5b/ef74ff65aebb74eaba51078e33ddd897247ba0d1197fd5a7953126205519/google_cloud_aiplatform-1.133.0-py2.py3-none-any.whl", hash = "sha256:dfc81228e987ca10d1c32c7204e2131b3c8d6b7c8e0b4e23bf7c56816bc4c566", size = 8184595, upload-time = "2026-01-08T22:11:22.067Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-bigquery"
|
||||
version = "3.41.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-cloud-core" },
|
||||
{ name = "google-resumable-media" },
|
||||
{ name = "packaging" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ce/13/6515c7aab55a4a0cf708ffd309fb9af5bab54c13e32dc22c5acd6497193c/google_cloud_bigquery-3.41.0.tar.gz", hash = "sha256:2217e488b47ed576360c9b2cc07d59d883a54b83167c0ef37f915c26b01a06fe", size = 513434, upload-time = "2026-03-30T22:50:55.347Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/40/33/1d3902efadef9194566d499d61507e1f038454e0b55499d2d7f8ab2a4fee/google_cloud_bigquery-3.41.0-py3-none-any.whl", hash = "sha256:2a5b5a737b401cbd824a6e5eac7554100b878668d908e6548836b5d8aaa4dcaa", size = 262343, upload-time = "2026-03-30T22:48:45.444Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-core"
|
||||
version = "2.5.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core" },
|
||||
{ name = "google-auth" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dc/24/6ca08b0a03c7b0c620427503ab00353a4ae806b848b93bcea18b6b76fde6/google_cloud_core-2.5.1.tar.gz", hash = "sha256:3dc94bdec9d05a31d9f355045ed0f369fbc0d8c665076c734f065d729800f811", size = 36078, upload-time = "2026-03-30T22:50:08.057Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/73/d9/5bb050cb32826466aa9b25f79e2ca2879fe66cb76782d4ed798dd7506151/google_cloud_core-2.5.1-py3-none-any.whl", hash = "sha256:ea62cdf502c20e3e14be8a32c05ed02113d7bef454e40ff3fab6fe1ec9f1f4e7", size = 29452, upload-time = "2026-03-30T22:48:31.567Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-resource-manager"
|
||||
version = "1.17.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "grpc-google-iam-v1" },
|
||||
{ name = "grpcio" },
|
||||
{ name = "proto-plus" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b2/1a/13060cabf553d52d151d2afc26b39561e82853380d499dd525a0d422d9f0/google_cloud_resource_manager-1.17.0.tar.gz", hash = "sha256:0f486b62e2c58ff992a3a50fa0f4a96eef7750aa6c971bb373398ccb91828660", size = 464971, upload-time = "2026-03-26T22:17:29.204Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/f7/661d7a9023e877a226b5683429c3662f75a29ef45cb1464cf39adb689218/google_cloud_resource_manager-1.17.0-py3-none-any.whl", hash = "sha256:e479baf4b014a57f298e01b8279e3290b032e3476d69c8e5e1427af8f82739a5", size = 404403, upload-time = "2026-03-26T22:15:26.57Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-storage"
|
||||
version = "3.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core" },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-cloud-core" },
|
||||
{ name = "google-crc32c" },
|
||||
{ name = "google-resumable-media" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4c/47/205eb8e9a1739b5345843e5a425775cbdc472cc38e7eda082ba5b8d02450/google_cloud_storage-3.10.1.tar.gz", hash = "sha256:97db9aa4460727982040edd2bd13ff3d5e2260b5331ad22895802da1fc2a5286", size = 17309950, upload-time = "2026-03-23T09:35:23.409Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/ff/ca9ab2417fa913d75aae38bf40bf856bb2749a604b2e0f701b37cfcd23cc/google_cloud_storage-3.10.1-py3-none-any.whl", hash = "sha256:a72f656759b7b99bda700f901adcb3425a828d4a29f911bc26b3ea79c5b1217f", size = 324453, upload-time = "2026-03-23T09:35:21.368Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-crc32c"
|
||||
version = "1.8.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/ef/21ccfaab3d5078d41efe8612e0ed0bfc9ce22475de074162a91a25f7980d/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8", size = 31298, upload-time = "2025-12-16T00:20:32.241Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/b8/f8413d3f4b676136e965e764ceedec904fe38ae8de0cdc52a12d8eb1096e/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7", size = 30872, upload-time = "2025-12-16T00:33:58.785Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/fd/33aa4ec62b290477181c55bb1c9302c9698c58c0ce9a6ab4874abc8b0d60/google_crc32c-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15", size = 33243, upload-time = "2025-12-16T00:40:21.46Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/03/4820b3bd99c9653d1a5210cb32f9ba4da9681619b4d35b6a052432df4773/google_crc32c-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a", size = 33608, upload-time = "2025-12-16T00:40:22.204Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/43/acf61476a11437bf9733fb2f70599b1ced11ec7ed9ea760fdd9a77d0c619/google_crc32c-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2", size = 34439, upload-time = "2025-12-16T00:35:20.458Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297, upload-time = "2025-12-16T00:23:20.709Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867, upload-time = "2025-12-16T00:43:14.628Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344, upload-time = "2025-12-16T00:40:24.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694, upload-time = "2025-12-16T00:40:25.505Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435, upload-time = "2025-12-16T00:35:22.107Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301, upload-time = "2025-12-16T00:24:48.527Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868, upload-time = "2025-12-16T00:48:12.163Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381, upload-time = "2025-12-16T00:40:26.268Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734, upload-time = "2025-12-16T00:40:27.028Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878, upload-time = "2025-12-16T00:35:23.142Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/c5/c171e4d8c44fec1422d801a6d2e5d7ddabd733eeda505c79730ee9607f07/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93", size = 28615, upload-time = "2025-12-16T00:40:29.298Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/97/7d75fe37a7a6ed171a2cf17117177e7aab7e6e0d115858741b41e9dd4254/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c", size = 28800, upload-time = "2025-12-16T00:40:30.322Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.52.0"
|
||||
@@ -2191,6 +2315,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-resumable-media"
|
||||
version = "2.8.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-crc32c" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3f/d1/b1ea14b93b6b78f57fc580125de44e9f593ab88dd2460f1a8a8d18f74754/google_resumable_media-2.8.2.tar.gz", hash = "sha256:f3354a182ebd193ae3f42e3ef95e6c9b10f128320de23ac7637236713b1acd70", size = 2164510, upload-time = "2026-03-30T23:34:25.369Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/f8/50bfaf4658431ff9de45c5c3935af7ab01157a4903c603cd0eee6e78e087/google_resumable_media-2.8.2-py3-none-any.whl", hash = "sha256:82b6d8ccd11765268cdd2a2123f417ec806b8eef3000a9a38dfe3033da5fb220", size = 81511, upload-time = "2026-03-30T23:34:09.671Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "googleapis-common-protos"
|
||||
version = "1.72.0"
|
||||
@@ -2203,6 +2339,11 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
grpc = [
|
||||
{ name = "grpcio" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "greenlet"
|
||||
version = "3.2.4"
|
||||
@@ -2253,6 +2394,85 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpc-google-iam-v1"
|
||||
version = "0.14.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos", extra = ["grpc"] },
|
||||
{ name = "grpcio" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/44/4f/d098419ad0bfc06c9ce440575f05aa22d8973b6c276e86ac7890093d3c37/grpc_google_iam_v1-0.14.4.tar.gz", hash = "sha256:392b3796947ed6334e61171d9ab06bf7eb357f554e5fc7556ad7aab6d0e17038", size = 23706, upload-time = "2026-04-01T01:57:49.813Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/89/22/c2dd50c09bf679bd38173656cd4402d2511e563b33bc88f90009cf50613c/grpc_google_iam_v1-0.14.4-py3-none-any.whl", hash = "sha256:412facc320fcbd94034b4df3d557662051d4d8adfa86e0ddb4dca70a3f739964", size = 32675, upload-time = "2026-04-01T01:57:47.69Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.80.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-status"
|
||||
version = "1.80.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos" },
|
||||
{ name = "grpcio" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/ed/105f619bdd00cb47a49aa2feea6232ea2bbb04199d52a22cc6a7d603b5cb/grpcio_status-1.80.0.tar.gz", hash = "sha256:df73802a4c89a3ea88aa2aff971e886fccce162bc2e6511408b3d67a144381cd", size = 13901, upload-time = "2026-03-30T08:54:34.784Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/76/80/58cd2dfc19a07d022abe44bde7c365627f6c7cb6f692ada6c65ca437d09a/grpcio_status-1.80.0-py3-none-any.whl", hash = "sha256:4b56990363af50dbf2c2ebb80f1967185c07d87aa25aa2bea45ddb75fc181dbe", size = 14638, upload-time = "2026-03-30T08:54:01.569Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.16.0"
|
||||
@@ -3149,6 +3369,11 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e6/05/3516cc7386b220d388aa0bd833308c677e94eceb82b2756dd95e06f6a13f/litellm-1.81.6-py3-none-any.whl", hash = "sha256:573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a", size = 12224343, upload-time = "2026-02-01T04:02:23.704Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
google = [
|
||||
{ name = "google-cloud-aiplatform" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "locket"
|
||||
version = "1.0.0"
|
||||
@@ -4217,7 +4442,7 @@ dependencies = [
|
||||
{ name = "fastapi" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "kubernetes" },
|
||||
{ name = "litellm" },
|
||||
{ name = "litellm", extra = ["google"] },
|
||||
{ name = "openai" },
|
||||
{ name = "prometheus-client" },
|
||||
{ name = "prometheus-fastapi-instrumentator" },
|
||||
@@ -4391,7 +4616,7 @@ requires-dist = [
|
||||
{ name = "fastapi", specifier = "==0.133.1" },
|
||||
{ name = "google-genai", specifier = "==1.52.0" },
|
||||
{ name = "kubernetes", specifier = ">=31.0.0" },
|
||||
{ name = "litellm", specifier = "==1.81.6" },
|
||||
{ name = "litellm", extras = ["google"], specifier = "==1.81.6" },
|
||||
{ name = "openai", specifier = "==2.14.0" },
|
||||
{ name = "prometheus-client", specifier = ">=0.21.1" },
|
||||
{ name = "prometheus-fastapi-instrumentator", specifier = "==7.1.0" },
|
||||
|
||||
@@ -82,7 +82,10 @@ ARG NODE_OPTIONS
|
||||
# SENTRY_AUTH_TOKEN is injected via BuildKit secret mount so it is never written
|
||||
# to any image layer, build cache, or registry manifest.
|
||||
# Use NODE_OPTIONS in the build command
|
||||
RUN --mount=type=secret,id=sentry_auth_token,env=SENTRY_AUTH_TOKEN \
|
||||
RUN --mount=type=secret,id=sentry_auth_token \
|
||||
if [ -f /run/secrets/sentry_auth_token ]; then \
|
||||
export SENTRY_AUTH_TOKEN="$(cat /run/secrets/sentry_auth_token)"; \
|
||||
fi && \
|
||||
NODE_OPTIONS="${NODE_OPTIONS}" npx next build
|
||||
|
||||
# Step 2. Production image, copy all the files and run next
|
||||
|
||||
@@ -271,6 +271,22 @@ export default function UserLibraryModal({
|
||||
/>
|
||||
</Section>
|
||||
|
||||
{/* The exact cap is controlled by the backend env var
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE (default 500). This copy is
|
||||
deliberately vague so it doesn't drift if the limit is
|
||||
tuned per-deployment; the precise number is surfaced in
|
||||
the rejection error the server returns. */}
|
||||
<Section
|
||||
flexDirection="row"
|
||||
justifyContent="end"
|
||||
padding={0.5}
|
||||
height="fit"
|
||||
>
|
||||
<Text secondaryBody text03>
|
||||
PDFs with many embedded images may be rejected.
|
||||
</Text>
|
||||
</Section>
|
||||
|
||||
{isLoading ? (
|
||||
<Section padding={2} height="fit">
|
||||
<Text secondaryBody text03>
|
||||
|
||||
@@ -12,9 +12,9 @@ interface LLMOption {
|
||||
value: string;
|
||||
icon: ReturnType<typeof getModelIcon>;
|
||||
modelName: string;
|
||||
providerId: number;
|
||||
providerName: string;
|
||||
provider: string;
|
||||
providerDisplayName: string;
|
||||
supportsImageInput: boolean;
|
||||
vendor: string | null;
|
||||
}
|
||||
@@ -64,7 +64,7 @@ export default function LLMSelector({
|
||||
return;
|
||||
}
|
||||
|
||||
const key = `${provider.provider}:${modelConfiguration.name}`;
|
||||
const key = `${provider.id}:${modelConfiguration.name}`;
|
||||
if (seenKeys.has(key)) {
|
||||
return; // Skip exact duplicate
|
||||
}
|
||||
@@ -87,10 +87,9 @@ export default function LLMSelector({
|
||||
),
|
||||
icon: getModelIcon(provider.provider, modelConfiguration.name),
|
||||
modelName: modelConfiguration.name,
|
||||
providerId: provider.id,
|
||||
providerName: provider.name,
|
||||
provider: provider.provider,
|
||||
providerDisplayName:
|
||||
provider.provider_display_name || provider.provider,
|
||||
supportsImageInput,
|
||||
vendor: modelConfiguration.vendor || null,
|
||||
};
|
||||
@@ -108,33 +107,34 @@ export default function LLMSelector({
|
||||
requiresImageGeneration,
|
||||
]);
|
||||
|
||||
// Group options by provider using backend-provided display names
|
||||
// Group options by configured provider instance so multiple instances of the
|
||||
// same provider type (e.g., two Anthropic API keys) appear as separate groups
|
||||
// labeled with their user-given names.
|
||||
const groupedOptions = useMemo(() => {
|
||||
const groups = new Map<
|
||||
string,
|
||||
number,
|
||||
{ displayName: string; options: LLMOption[] }
|
||||
>();
|
||||
|
||||
llmOptions.forEach((option) => {
|
||||
const provider = option.provider.toLowerCase();
|
||||
if (!groups.has(provider)) {
|
||||
groups.set(provider, {
|
||||
displayName: option.providerDisplayName,
|
||||
if (!groups.has(option.providerId)) {
|
||||
groups.set(option.providerId, {
|
||||
displayName: option.providerName,
|
||||
options: [],
|
||||
});
|
||||
}
|
||||
groups.get(provider)!.options.push(option);
|
||||
groups.get(option.providerId)!.options.push(option);
|
||||
});
|
||||
|
||||
// Sort groups alphabetically by display name
|
||||
const sortedProviders = Array.from(groups.keys()).sort((a, b) =>
|
||||
const sortedProviderIds = Array.from(groups.keys()).sort((a, b) =>
|
||||
groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)
|
||||
);
|
||||
|
||||
return sortedProviders.map((provider) => {
|
||||
const group = groups.get(provider)!;
|
||||
return sortedProviderIds.map((providerId) => {
|
||||
const group = groups.get(providerId)!;
|
||||
return {
|
||||
provider,
|
||||
providerId,
|
||||
displayName: group.displayName,
|
||||
options: group.options,
|
||||
};
|
||||
@@ -179,7 +179,7 @@ export default function LLMSelector({
|
||||
)}
|
||||
{showGrouped
|
||||
? groupedOptions.map((group) => (
|
||||
<InputSelect.Group key={group.provider}>
|
||||
<InputSelect.Group key={group.providerId}>
|
||||
<InputSelect.Label>{group.displayName}</InputSelect.Label>
|
||||
{group.options.map((option) => (
|
||||
<InputSelect.Item
|
||||
|
||||
@@ -82,6 +82,7 @@ export interface LineItemProps
|
||||
|
||||
selected?: boolean;
|
||||
icon?: React.FunctionComponent<IconProps>;
|
||||
strokeIcon?: boolean;
|
||||
description?: string;
|
||||
rightChildren?: React.ReactNode;
|
||||
href?: string;
|
||||
@@ -154,6 +155,7 @@ export default function LineItem({
|
||||
skeleton,
|
||||
emphasized,
|
||||
icon: Icon,
|
||||
strokeIcon = true,
|
||||
description,
|
||||
children,
|
||||
rightChildren,
|
||||
@@ -245,7 +247,12 @@ export default function LineItem({
|
||||
!!(children && description) && "mt-0.5"
|
||||
)}
|
||||
>
|
||||
<Icon className={cn("h-[1rem] w-[1rem]", iconClassNames[variant])} />
|
||||
<Icon
|
||||
className={cn(
|
||||
"h-[1rem] w-[1rem]",
|
||||
strokeIcon && iconClassNames[variant]
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<Section alignItems="start" gap={0}>
|
||||
|
||||
@@ -103,6 +103,7 @@ export default function SwitchList({
|
||||
item.leading) as React.FunctionComponent<IconProps>)
|
||||
: undefined
|
||||
}
|
||||
strokeIcon={false}
|
||||
rightChildren={
|
||||
<Switch
|
||||
checked={item.isEnabled}
|
||||
|
||||
@@ -172,6 +172,7 @@ export default function ModelListContent({
|
||||
<LineItem
|
||||
muted
|
||||
icon={group.Icon}
|
||||
strokeIcon={false}
|
||||
rightChildren={
|
||||
open ? (
|
||||
<SvgChevronDown className="h-4 w-4 stroke-text-04 shrink-0" />
|
||||
|
||||
@@ -146,6 +146,7 @@ function SharedGroupResources({
|
||||
interactive={!dimmed}
|
||||
muted={dimmed}
|
||||
icon={getSourceMetadata(p.connector.source).icon}
|
||||
strokeIcon={false}
|
||||
rightChildren={
|
||||
p.groups.length > 0 || dimmed ? <SharedBadge /> : undefined
|
||||
}
|
||||
|
||||
@@ -186,6 +186,7 @@ export default function UserFilters({
|
||||
<LineItem
|
||||
key={role}
|
||||
icon={isSelected ? SvgCheck : roleIcon}
|
||||
strokeIcon={isSelected || role !== UserRole.SLACK_USER}
|
||||
selected={isSelected}
|
||||
emphasized={isSelected}
|
||||
onClick={() => toggleRole(role)}
|
||||
|
||||
@@ -131,6 +131,7 @@ function KnowledgeSidebar({
|
||||
<LineItem
|
||||
key={connectedSource.source}
|
||||
icon={sourceMetadata.icon}
|
||||
strokeIcon={false}
|
||||
onClick={() => onNavigateToSource(connectedSource.source)}
|
||||
selected={isActive}
|
||||
emphasized={isActive || isSelected || selectionCount > 0}
|
||||
@@ -720,6 +721,7 @@ const KnowledgeAddView = memo(function KnowledgeAddView({
|
||||
<LineItem
|
||||
key={connectedSource.source}
|
||||
icon={sourceMetadata.icon}
|
||||
strokeIcon={false}
|
||||
onClick={() => onNavigateToSource(connectedSource.source)}
|
||||
emphasized={isSelected || selectionCount > 0}
|
||||
aria-label={`knowledge-add-source-${connectedSource.source}`}
|
||||
|
||||
Reference in New Issue
Block a user