Compare commits

..

27 Commits

Author SHA1 Message Date
Jamison Lahman
87f8e5d3cc fix(security): enforce chat session ownership on stop endpoint (#10413) to release v3.2 (#10417)
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-21 10:30:09 -07:00
Jamison Lahman
5ef8a33e4b fix(files): harden authorization on chat file downloads (#10380) to release v3.2 (#10390) 2026-04-20 14:01:59 -07:00
Nikolas Garza
9d09261fd1 fix(gong): move call-details retry into checkpoint state instead of blocking (#10369) to release v3.2 (#10388) 2026-04-20 13:12:43 -07:00
Nikolas Garza
bf173654aa feat(connectors): convert Gong connector from poll to checkpointed (#10258) to release v3.2 (#10359) 2026-04-20 09:40:52 -07:00
Justin Tahara
de0575352b fix(metrics): Adding in hostname (#10335) 2026-04-17 13:39:48 -07:00
github-actions[bot]
d9185bcd2d fix(deps): install transitive vertexai dependency (#10328) to release v3.2 (#10332)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-17 12:17:25 -07:00
Justin Tahara
5c36d7bcd6 fix(web): Sentry Token Check (#10310) 2026-04-17 09:02:48 -07:00
github-actions[bot]
f81dc07afb fix(fe): LineItem can disable icon stroke (#10289) to release v3.2 (#10315)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-17 08:27:31 -07:00
Nikolas Garza
1215ef4576 chore(hotfix): cherry-pick 3 commits to release v3.2 (#10309)
Co-authored-by: Bo-Onyx <bo@onyx.app>
Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>
2026-04-16 21:55:17 -07:00
Nikolas Garza
63d6f01895 chore(hotfix): cherry-pick 5 commits to release v3.2 (#10306)
Co-authored-by: Justin Tahara <105671973+justin-tahara@users.noreply.github.com>
2026-04-16 21:54:41 -07:00
Justin Tahara
8fc2b3c3de fix(image): Cap Uploaded File Image Count (#10298) 2026-04-16 21:29:30 -07:00
github-actions[bot]
f5c48887f1 fix(llm-selector): show each provider instance as its own group (#10292) to release v3.2 (#10296)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-16 17:26:04 -07:00
github-actions[bot]
fe363bb62b fix: gmail datetime parsing on unexpected values (#10290) to release v3.2 (#10294)
Co-authored-by: Wenxi <wenxi@onyx.app>
2026-04-16 17:03:47 -07:00
github-actions[bot]
9862b0ef59 fix(logos): github logo displays correctly in dark mode (#10269) to release v3.2 (#10284)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-16 15:10:14 -07:00
github-actions[bot]
8a7aeb2c59 feat(anthropic): include Opus 4.7 in recommended models (#10273) to release v3.2 (#10280)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-16 14:33:10 -07:00
github-actions[bot]
648dcd1e47 feat(img): Editing User Uploaded Images (#10264) to release v3.2 (#10278)
Co-authored-by: Danelegend <43459662+Danelegend@users.noreply.github.com>
2026-04-16 13:46:23 -07:00
Nikolas Garza
f73796928c fix(chat): only header click selects preferred in multi-model panels (#10198) to release v3.2 (#10234) 2026-04-15 14:37:10 -07:00
github-actions[bot]
91101e8f2c fix(chat): keep model selector popover open until max models reached (#10203) to release v3.2 (#10216)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-15 14:34:46 -07:00
github-actions[bot]
44bb3ded44 fix(chat): fix fade gradient missing on last multi-model panel (#10199) to release v3.2 (#10214)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-15 14:34:38 -07:00
github-actions[bot]
493e3f23b8 fix(chat): disable hover/pointer states on multi-model panels during streaming (#10202) to release v3.2 (#10215)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-15 14:32:32 -07:00
github-actions[bot]
031c1118bd fix(chat): snap typewriter to full content on tab re-focus (#10226) to release v3.2 (#10231)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-15 14:27:55 -07:00
github-actions[bot]
b8b7702f28 fix(chat): hide incomplete citation links during streaming (#10224) to release v3.2 (#10232)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-15 14:27:44 -07:00
github-actions[bot]
ebb67aede9 fix(voice): send TTS text in POST body instead of query params (#10213) to release v3.2 (#10221)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-15 10:31:07 -07:00
github-actions[bot]
340cd520eb fix(ollama): always include model tag in display name (#10218) to release v3.2 (#10219)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-04-15 09:22:53 -07:00
github-actions[bot]
b626ad232c fix(fe): handle file attachment overflow (#10211) to release v3.2 (#10212)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-14 19:21:25 -07:00
github-actions[bot]
f1ee9c12c0 fix(chat): render inline citation chips in multi-model panels (#10196) to release v3.2 (#10201)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-14 16:07:23 -07:00
github-actions[bot]
378cbedaa1 fix(chat): eliminate long-lived DB session in multi-model worker threads (#10159) to release v3.2 (#10191)
Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>
2026-04-14 14:16:36 -07:00
94 changed files with 3810 additions and 1459 deletions

View File

@@ -10,6 +10,7 @@ from celery import bootsteps # type: ignore
from celery import Task
from celery.app import trace
from celery.exceptions import WorkerShutdown
from celery.signals import before_task_publish
from celery.signals import task_postrun
from celery.signals import task_prerun
from celery.states import READY_STATES
@@ -94,6 +95,17 @@ class TenantAwareTask(Task):
CURRENT_TENANT_ID_CONTEXTVAR.set(None)
@before_task_publish.connect
def on_before_task_publish(
headers: dict[str, Any] | None = None,
**kwargs: Any, # noqa: ARG001
) -> None:
"""Stamp the current wall-clock time into the task message headers so that
workers can compute queue wait time (time between publish and execution)."""
if headers is not None:
headers["enqueued_at"] = time.time()
@task_prerun.connect
def on_task_prerun(
sender: Any | None = None, # noqa: ARG001

View File

@@ -16,6 +16,12 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME
from onyx.db.engine.sql_engine import SqlEngine
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
from onyx.server.metrics.metrics_server import start_metrics_server
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
@@ -36,6 +42,7 @@ def on_task_prerun(
**kwds: Any,
) -> None:
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
on_celery_task_prerun(task_id, task)
@signals.task_postrun.connect
@@ -50,6 +57,31 @@ def on_task_postrun(
**kwds: Any,
) -> None:
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
on_celery_task_postrun(task_id, task, state)
@signals.task_retry.connect
def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
task_id = getattr(getattr(sender, "request", None), "id", None)
on_celery_task_retry(task_id, sender)
@signals.task_revoked.connect
def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
task_name = getattr(sender, "name", None) or str(sender)
on_celery_task_revoked(kwargs.get("task_id"), task_name)
@signals.task_rejected.connect
def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
message = kwargs.get("message")
task_name: str | None = None
if message is not None:
headers = getattr(message, "headers", None) or {}
task_name = headers.get("task")
if task_name is None:
task_name = "unknown"
on_celery_task_rejected(None, task_name)
@celeryd_init.connect
@@ -90,6 +122,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
start_metrics_server("light")
app_base.on_worker_ready(sender, **kwargs)

View File

@@ -38,6 +38,12 @@ from onyx.redis.redis_connector_stop import RedisConnectorStop
from onyx.redis.redis_document_set import RedisDocumentSet
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_usergroup import RedisUserGroup
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
from onyx.server.metrics.metrics_server import start_metrics_server
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
@@ -59,6 +65,7 @@ def on_task_prerun(
**kwds: Any,
) -> None:
app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
on_celery_task_prerun(task_id, task)
@signals.task_postrun.connect
@@ -73,6 +80,31 @@ def on_task_postrun(
**kwds: Any,
) -> None:
app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
on_celery_task_postrun(task_id, task, state)
@signals.task_retry.connect
def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
task_id = getattr(getattr(sender, "request", None), "id", None)
on_celery_task_retry(task_id, sender)
@signals.task_revoked.connect
def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
task_name = getattr(sender, "name", None) or str(sender)
on_celery_task_revoked(kwargs.get("task_id"), task_name)
@signals.task_rejected.connect
def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None: # noqa: ARG001
message = kwargs.get("message")
task_name: str | None = None
if message is not None:
headers = getattr(message, "headers", None) or {}
task_name = headers.get("task")
if task_name is None:
task_name = "unknown"
on_celery_task_rejected(None, task_name)
@celeryd_init.connect
@@ -212,6 +244,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
@worker_ready.connect
def on_worker_ready(sender: Any, **kwargs: Any) -> None:
start_metrics_server("primary")
app_base.on_worker_ready(sender, **kwargs)

View File

@@ -59,6 +59,11 @@ from onyx.redis.redis_connector_delete import RedisConnectorDelete
from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.server.metrics.deletion_metrics import inc_deletion_blocked
from onyx.server.metrics.deletion_metrics import inc_deletion_completed
from onyx.server.metrics.deletion_metrics import inc_deletion_fence_reset
from onyx.server.metrics.deletion_metrics import inc_deletion_started
from onyx.server.metrics.deletion_metrics import observe_deletion_taskset_duration
from onyx.utils.variable_functionality import (
fetch_versioned_implementation_with_fallback,
)
@@ -300,6 +305,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
recent_index_attempts
and recent_index_attempts[0].status == IndexingStatus.IN_PROGRESS
):
inc_deletion_blocked(tenant_id, "indexing")
raise TaskDependencyError(
"Connector deletion - Delayed (indexing in progress): "
f"cc_pair={cc_pair_id} "
@@ -307,11 +313,13 @@ def try_generate_document_cc_pair_cleanup_tasks(
)
if redis_connector.prune.fenced:
inc_deletion_blocked(tenant_id, "pruning")
raise TaskDependencyError(
f"Connector deletion - Delayed (pruning in progress): cc_pair={cc_pair_id}"
)
if redis_connector.permissions.fenced:
inc_deletion_blocked(tenant_id, "permissions")
raise TaskDependencyError(
f"Connector deletion - Delayed (permissions in progress): cc_pair={cc_pair_id}"
)
@@ -359,6 +367,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
# set this only after all tasks have been added
fence_payload.num_tasks = tasks_generated
redis_connector.delete.set_fence(fence_payload)
inc_deletion_started(tenant_id)
return tasks_generated
@@ -523,6 +532,12 @@ def monitor_connector_deletion_taskset(
num_docs_synced=fence_data.num_tasks,
)
duration = (
datetime.now(timezone.utc) - fence_data.submitted
).total_seconds()
observe_deletion_taskset_duration(tenant_id, "success", duration)
inc_deletion_completed(tenant_id, "success")
except Exception as e:
db_session.rollback()
stack_trace = traceback.format_exc()
@@ -541,6 +556,11 @@ def monitor_connector_deletion_taskset(
f"Connector deletion exceptioned: "
f"cc_pair={cc_pair_id} connector={connector_id_to_delete} credential={credential_id_to_delete}"
)
duration = (
datetime.now(timezone.utc) - fence_data.submitted
).total_seconds()
observe_deletion_taskset_duration(tenant_id, "failure", duration)
inc_deletion_completed(tenant_id, "failure")
raise e
task_logger.info(
@@ -717,5 +737,6 @@ def validate_connector_deletion_fence(
f"fence={fence_key}"
)
inc_deletion_fence_reset(tenant_id)
redis_connector.delete.reset()
return

View File

@@ -34,6 +34,7 @@ from onyx.db.index_attempt import mark_attempt_canceled
from onyx.db.index_attempt import mark_attempt_failed
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.redis.redis_connector import RedisConnector
from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
from onyx.utils.logger import setup_logger
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import SENTRY_DSN
@@ -467,6 +468,15 @@ def docfetching_proxy_task(
index_attempt.connector_credential_pair.connector.source.value
)
cc_pair = index_attempt.connector_credential_pair
on_index_attempt_status_change(
tenant_id=tenant_id,
source=result.connector_source,
cc_pair_id=cc_pair_id,
connector_name=cc_pair.connector.name or f"cc_pair_{cc_pair_id}",
status="in_progress",
)
while True:
sleep(5)

View File

@@ -105,6 +105,9 @@ from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
from onyx.redis.redis_utils import is_fence
from onyx.server.metrics.connector_health_metrics import on_connector_error_state_change
from onyx.server.metrics.connector_health_metrics import on_connector_indexing_success
from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
@@ -400,7 +403,6 @@ def check_indexing_completion(
tenant_id: str,
task: Task,
) -> None:
logger.info(
f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
)
@@ -521,13 +523,25 @@ def check_indexing_completion(
# Update CC pair status if successful
cc_pair = get_connector_credential_pair_from_id(
db_session, attempt.connector_credential_pair_id
db_session,
attempt.connector_credential_pair_id,
eager_load_connector=True,
)
if cc_pair is None:
raise RuntimeError(
f"CC pair {attempt.connector_credential_pair_id} not found in database"
)
source = cc_pair.connector.source.value
connector_name = cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
on_index_attempt_status_change(
tenant_id=tenant_id,
source=source,
cc_pair_id=cc_pair.id,
connector_name=connector_name,
status=attempt.status.value,
)
if attempt.status.is_successful():
# NOTE: we define the last successful index time as the time the last successful
# attempt finished. This is distinct from the poll_range_end of the last successful
@@ -548,10 +562,26 @@ def check_indexing_completion(
event=MilestoneRecordType.CONNECTOR_SUCCEEDED,
)
on_connector_indexing_success(
tenant_id=tenant_id,
source=source,
cc_pair_id=cc_pair.id,
connector_name=connector_name,
docs_indexed=attempt.new_docs_indexed or 0,
success_timestamp=attempt.time_updated.timestamp(),
)
# Clear repeated error state on success
if cc_pair.in_repeated_error_state:
cc_pair.in_repeated_error_state = False
db_session.commit()
on_connector_error_state_change(
tenant_id=tenant_id,
source=source,
cc_pair_id=cc_pair.id,
connector_name=connector_name,
in_error=False,
)
if attempt.status == IndexingStatus.SUCCESS:
logger.info(
@@ -848,6 +878,16 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
cc_pair_id=cc_pair_id,
in_repeated_error_state=True,
)
error_connector_name = (
cc_pair.connector.name or f"cc_pair_{cc_pair.id}"
)
on_connector_error_state_change(
tenant_id=tenant_id,
source=cc_pair.connector.source.value,
cc_pair_id=cc_pair_id,
connector_name=error_connector_name,
in_error=True,
)
# When entering repeated error state, also pause the connector
# to prevent continued indexing retry attempts burning through embedding credits.
# NOTE: only for Cloud, since most self-hosted users use self-hosted embedding

View File

@@ -4,8 +4,6 @@ from collections.abc import Callable
from typing import Any
from typing import Literal
from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_utils import create_tool_call_failure_messages
from onyx.chat.citation_processor import CitationMapping
@@ -635,7 +633,6 @@ def run_llm_loop(
user_memory_context: UserMemoryContext | None,
llm: LLM,
token_counter: Callable[[str], int],
db_session: Session,
forced_tool_id: int | None = None,
user_identity: LLMUserIdentity | None = None,
chat_session_id: str | None = None,
@@ -1020,20 +1017,16 @@ def run_llm_loop(
persisted_memory_id: int | None = None
if user_memory_context and user_memory_context.user_id:
if tool_response.rich_response.index_to_replace is not None:
memory = update_memory_at_index(
persisted_memory_id = update_memory_at_index(
user_id=user_memory_context.user_id,
index=tool_response.rich_response.index_to_replace,
new_text=tool_response.rich_response.memory_text,
db_session=db_session,
)
persisted_memory_id = memory.id if memory else None
else:
memory = add_memory(
persisted_memory_id = add_memory(
user_id=user_memory_context.user_id,
memory_text=tool_response.rich_response.memory_text,
db_session=db_session,
)
persisted_memory_id = memory.id
operation: Literal["add", "update"] = (
"update"
if tool_response.rich_response.index_to_replace is not None

View File

@@ -826,6 +826,12 @@ def translate_history_to_llm_format(
base64_data = img_file.to_base64()
image_url = f"data:{image_type};base64,{base64_data}"
content_parts.append(
TextContentPart(
type="text",
text=f"[attached image — file_id: {img_file.file_id}]",
)
)
image_part = ImageContentPart(
type="image_url",
image_url=ImageUrlDetail(

View File

@@ -67,7 +67,6 @@ from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_or_create_root_message
from onyx.db.chat import reserve_message_id
from onyx.db.chat import reserve_multi_model_message_ids
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import HookPoint
from onyx.db.memory import get_memories
from onyx.db.models import ChatMessage
@@ -1006,93 +1005,86 @@ def _run_models(
model_llm = setup.llms[model_idx]
try:
# Each worker opens its own session — SQLAlchemy sessions are not thread-safe.
# Do NOT write to the outer db_session (or any shared DB state) from here;
# all DB writes in this thread must go through thread_db_session.
with get_session_with_current_tenant() as thread_db_session:
thread_tool_dict = construct_tools(
persona=setup.persona,
db_session=thread_db_session,
emitter=model_emitter,
user=user,
llm=model_llm,
search_tool_config=SearchToolConfig(
user_selected_filters=setup.new_msg_req.internal_search_filters,
project_id_filter=setup.search_params.project_id_filter,
persona_id_filter=setup.search_params.persona_id_filter,
bypass_acl=setup.bypass_acl,
slack_context=setup.slack_context,
enable_slack_search=_should_enable_slack_search(
setup.persona, setup.new_msg_req.internal_search_filters
),
# Each function opens short-lived DB sessions on demand.
# Do NOT pass a long-lived session here — it would hold a
# connection for the entire LLM loop (minutes), and cloud
# infrastructure may drop idle connections.
thread_tool_dict = construct_tools(
persona=setup.persona,
emitter=model_emitter,
user=user,
llm=model_llm,
search_tool_config=SearchToolConfig(
user_selected_filters=setup.new_msg_req.internal_search_filters,
project_id_filter=setup.search_params.project_id_filter,
persona_id_filter=setup.search_params.persona_id_filter,
bypass_acl=setup.bypass_acl,
slack_context=setup.slack_context,
enable_slack_search=_should_enable_slack_search(
setup.persona, setup.new_msg_req.internal_search_filters
),
custom_tool_config=CustomToolConfig(
chat_session_id=setup.chat_session.id,
message_id=setup.user_message.id,
additional_headers=setup.custom_tool_additional_headers,
mcp_headers=setup.mcp_headers,
),
file_reader_tool_config=FileReaderToolConfig(
user_file_ids=setup.available_files.user_file_ids,
chat_file_ids=setup.available_files.chat_file_ids,
),
allowed_tool_ids=setup.new_msg_req.allowed_tool_ids,
search_usage_forcing_setting=setup.search_params.search_usage,
),
custom_tool_config=CustomToolConfig(
chat_session_id=setup.chat_session.id,
message_id=setup.user_message.id,
additional_headers=setup.custom_tool_additional_headers,
mcp_headers=setup.mcp_headers,
),
file_reader_tool_config=FileReaderToolConfig(
user_file_ids=setup.available_files.user_file_ids,
chat_file_ids=setup.available_files.chat_file_ids,
),
allowed_tool_ids=setup.new_msg_req.allowed_tool_ids,
search_usage_forcing_setting=setup.search_params.search_usage,
)
model_tools = [
tool for tool_list in thread_tool_dict.values() for tool in tool_list
]
if setup.forced_tool_id and setup.forced_tool_id not in {
tool.id for tool in model_tools
}:
raise ValueError(
f"Forced tool {setup.forced_tool_id} not found in tools"
)
model_tools = [
tool
for tool_list in thread_tool_dict.values()
for tool in tool_list
]
if setup.forced_tool_id and setup.forced_tool_id not in {
tool.id for tool in model_tools
}:
raise ValueError(
f"Forced tool {setup.forced_tool_id} not found in tools"
)
# Per-thread copy: run_llm_loop mutates simple_chat_history in-place.
if n_models == 1 and setup.new_msg_req.deep_research:
if setup.chat_session.project_id:
raise RuntimeError(
"Deep research is not supported for projects"
)
run_deep_research_llm_loop(
emitter=model_emitter,
state_container=sc,
simple_chat_history=list(setup.simple_chat_history),
tools=model_tools,
custom_agent_prompt=setup.custom_agent_prompt,
llm=model_llm,
token_counter=get_llm_token_counter(model_llm),
db_session=thread_db_session,
skip_clarification=setup.skip_clarification,
user_identity=setup.user_identity,
chat_session_id=str(setup.chat_session.id),
all_injected_file_metadata=setup.all_injected_file_metadata,
)
else:
run_llm_loop(
emitter=model_emitter,
state_container=sc,
simple_chat_history=list(setup.simple_chat_history),
tools=model_tools,
custom_agent_prompt=setup.custom_agent_prompt,
context_files=setup.extracted_context_files,
persona=setup.persona,
user_memory_context=setup.user_memory_context,
llm=model_llm,
token_counter=get_llm_token_counter(model_llm),
db_session=thread_db_session,
forced_tool_id=setup.forced_tool_id,
user_identity=setup.user_identity,
chat_session_id=str(setup.chat_session.id),
chat_files=setup.chat_files_for_tools,
include_citations=setup.new_msg_req.include_citations,
all_injected_file_metadata=setup.all_injected_file_metadata,
inject_memories_in_prompt=user.use_memories,
)
# Per-thread copy: run_llm_loop mutates simple_chat_history in-place.
if n_models == 1 and setup.new_msg_req.deep_research:
if setup.chat_session.project_id:
raise RuntimeError("Deep research is not supported for projects")
run_deep_research_llm_loop(
emitter=model_emitter,
state_container=sc,
simple_chat_history=list(setup.simple_chat_history),
tools=model_tools,
custom_agent_prompt=setup.custom_agent_prompt,
llm=model_llm,
token_counter=get_llm_token_counter(model_llm),
skip_clarification=setup.skip_clarification,
user_identity=setup.user_identity,
chat_session_id=str(setup.chat_session.id),
all_injected_file_metadata=setup.all_injected_file_metadata,
)
else:
run_llm_loop(
emitter=model_emitter,
state_container=sc,
simple_chat_history=list(setup.simple_chat_history),
tools=model_tools,
custom_agent_prompt=setup.custom_agent_prompt,
context_files=setup.extracted_context_files,
persona=setup.persona,
user_memory_context=setup.user_memory_context,
llm=model_llm,
token_counter=get_llm_token_counter(model_llm),
forced_tool_id=setup.forced_tool_id,
user_identity=setup.user_identity,
chat_session_id=str(setup.chat_session.id),
chat_files=setup.chat_files_for_tools,
include_citations=setup.new_msg_req.include_citations,
all_injected_file_metadata=setup.all_injected_file_metadata,
inject_memories_in_prompt=user.use_memories,
)
model_succeeded[model_idx] = True

View File

@@ -840,6 +840,29 @@ MAX_FILE_SIZE_BYTES = int(
os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
) # 2GB in bytes
# Maximum embedded images allowed in a single file. PDFs (and other formats)
# with thousands of embedded images can OOM the user-file-processing worker
# because every image is decoded with PIL and then sent to the vision LLM.
# Enforced both at upload time (rejects the file) and during extraction
# (defense-in-depth: caps the number of images materialized).
#
# Clamped to >= 0; a negative env value would turn upload validation into
# always-fail and extraction into always-stop, which is never desired. 0
# disables image extraction entirely, which is a valid (if aggressive) setting.
MAX_EMBEDDED_IMAGES_PER_FILE = max(
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
)
# Maximum embedded images allowed across all files in a single upload batch.
# Protects against the scenario where a user uploads many files that each
# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
# worker under concurrency or run up surprise latency/cost. Also clamped
# to >= 0.
MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
)
# Use document summary for contextual rag
USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
# Use chunk summary for contextual rag

View File

@@ -3,6 +3,7 @@ from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from email.utils import parsedate_to_datetime
from typing import Any
from typing import TypeVar
from urllib.parse import urljoin
@@ -10,7 +11,6 @@ from urllib.parse import urlparse
import requests
from dateutil.parser import parse
from dateutil.parser import ParserError
from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
from onyx.configs.constants import DocumentSource
@@ -56,18 +56,16 @@ def time_str_to_utc(datetime_str: str) -> datetime:
if fixed not in candidates:
candidates.append(fixed)
last_exception: Exception | None = None
for candidate in candidates:
try:
dt = parse(candidate)
return datetime_to_utc(dt)
except (ValueError, ParserError) as exc:
last_exception = exc
# dateutil is the primary; the stdlib RFC 2822 parser is a fallback for
# inputs dateutil rejects (e.g. headers concatenated without a CRLF —
# TZ may be dropped, datetime_to_utc then assumes UTC).
for parser in (parse, parsedate_to_datetime):
for candidate in candidates:
try:
return datetime_to_utc(parser(candidate))
except (TypeError, ValueError, OverflowError):
continue
if last_exception is not None:
raise last_exception
# Fallback in case parsing failed without raising (should not happen)
raise ValueError(f"Unable to parse datetime string: {datetime_str}")

View File

@@ -253,7 +253,17 @@ def thread_to_document(
updated_at_datetime = None
if updated_at:
updated_at_datetime = time_str_to_utc(updated_at)
try:
updated_at_datetime = time_str_to_utc(updated_at)
except (ValueError, OverflowError) as e:
# Old mailboxes contain RFC-violating Date headers. Drop the
# timestamp instead of aborting the indexing run.
logger.warning(
"Skipping unparseable Gmail Date header on thread %s: %r (%s)",
full_thread.get("id"),
updated_at,
e,
)
id = full_thread.get("id")
if not id:

View File

@@ -1,4 +1,5 @@
import base64
import copy
import time
from collections.abc import Generator
from datetime import datetime
@@ -8,43 +9,93 @@ from typing import Any
from typing import cast
import requests
from pydantic import BaseModel
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from onyx.configs.app_configs import GONG_CONNECTOR_START_TIME
from onyx.configs.app_configs import INDEX_BATCH_SIZE
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.interfaces import CheckpointOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import TextSection
from onyx.utils.logger import setup_logger
logger = setup_logger()
class GongConnector(LoadConnector, PollConnector):
class GongConnectorCheckpoint(ConnectorCheckpoint):
# Resolved workspace IDs to iterate through.
# None means "not yet resolved" — first checkpoint call resolves them.
# Inner None means "no workspace filter" (fetch all).
workspace_ids: list[str | None] | None = None
# Index into workspace_ids for current workspace
workspace_index: int = 0
# Gong API cursor for current workspace's transcript pagination
cursor: str | None = None
# Cached time range — computed once, reused across checkpoint calls
time_range: tuple[str, str] | None = None
# Transcripts whose call details were not yet available from /v2/calls/extensive
# (Gong has a known race where transcript call IDs take time to propagate).
# Keyed by call_id. Retried on subsequent checkpoint invocations.
#
# Invariant: all entries share one resolution session — they're stashed
# together from a single page and share the attempt counter and retry
# deadline. load_from_checkpoint only fetches a new page when this dict
# is empty, so entries from different pages can't mix.
pending_transcripts: dict[str, dict[str, Any]] = {}
# Number of resolution attempts made for pending_transcripts so far.
pending_call_details_attempts: int = 0
# Unix timestamp before which we should not retry pending_transcripts.
# Enforces exponential backoff independent of worker cadence — Gong's
# transcript-ID propagation race can take tens of seconds to minutes,
# longer than typical worker reinvocation intervals.
pending_retry_after: float | None = None
class _TranscriptPage(BaseModel):
"""One page of transcripts from /v2/calls/transcript."""
transcripts: list[dict[str, Any]]
next_cursor: str | None = None
class _CursorExpiredError(Exception):
"""Raised when Gong rejects a pagination cursor as expired.
Gong pagination cursors TTL is ~1 hour from the first request in a
pagination sequence, not from the last cursor fetch. Since checkpointed
connector runs can pause between invocations, a resumed run may encounter
an expired cursor and must restart the current workspace from scratch.
See https://visioneers.gong.io/integrations-77/pagination-cursor-expires-after-1-hours-even-for-a-new-cursor-1382
"""
class GongConnector(CheckpointedConnector[GongConnectorCheckpoint]):
BASE_URL = "https://api.gong.io"
# Max number of attempts to resolve missing call details across checkpoint
# invocations before giving up and emitting ConnectorFailure.
MAX_CALL_DETAILS_ATTEMPTS = 6
CALL_DETAILS_DELAY = 30 # in seconds
# Base delay for exponential backoff between pending-transcript retry
# attempts. Delay before attempt N (N >= 2) is CALL_DETAILS_DELAY * 2^(N-2)
# seconds (30, 60, 120, 240, 480 = ~15.5min total) — matching the original
# blocking-retry schedule, but enforced via checkpoint deadline rather
# than in-call time.sleep.
CALL_DETAILS_DELAY = 30
# Gong API limit is 3 calls/sec — stay safely under it
MIN_REQUEST_INTERVAL = 0.5 # seconds between requests
def __init__(
self,
workspaces: list[str] | None = None,
batch_size: int = INDEX_BATCH_SIZE,
continue_on_fail: bool = CONTINUE_ON_CONNECTOR_FAILURE,
hide_user_info: bool = False,
) -> None:
self.workspaces = workspaces
self.batch_size: int = batch_size
self.continue_on_fail = continue_on_fail
self.auth_token_basic: str | None = None
self.hide_user_info = hide_user_info
self._last_request_time: float = 0.0
@@ -98,67 +149,50 @@ class GongConnector(LoadConnector, PollConnector):
# Then the user input is treated as the name
return {**id_id_map, **name_id_map}
def _get_transcript_batches(
self, start_datetime: str | None = None, end_datetime: str | None = None
) -> Generator[list[dict[str, Any]], None, None]:
body: dict[str, dict] = {"filter": {}}
def _fetch_transcript_page(
self,
start_datetime: str | None,
end_datetime: str | None,
workspace_id: str | None,
cursor: str | None,
) -> _TranscriptPage:
"""Fetch one page of transcripts from the Gong API.
Raises _CursorExpiredError if Gong reports the pagination cursor
expired (TTL is ~1 hour from first request in the pagination sequence).
"""
body: dict[str, Any] = {"filter": {}}
if start_datetime:
body["filter"]["fromDateTime"] = start_datetime
if end_datetime:
body["filter"]["toDateTime"] = end_datetime
if workspace_id:
body["filter"]["workspaceId"] = workspace_id
if cursor:
body["cursor"] = cursor
# The batch_ids in the previous method appears to be batches of call_ids to process
# In this method, we will retrieve transcripts for them in batches.
transcripts: list[dict[str, Any]] = []
workspace_list = self.workspaces or [None] # type: ignore
workspace_map = self._get_workspace_id_map() if self.workspaces else {}
response = self._throttled_request(
"POST", GongConnector.make_url("/v2/calls/transcript"), json=body
)
# If no calls in the range, return empty
if response.status_code == 404:
return _TranscriptPage(transcripts=[])
for workspace in workspace_list:
if workspace:
logger.info(f"Updating Gong workspace: {workspace}")
workspace_id = workspace_map.get(workspace)
if not workspace_id:
logger.error(f"Invalid Gong workspace: {workspace}")
if not self.continue_on_fail:
raise ValueError(f"Invalid workspace: {workspace}")
continue
body["filter"]["workspaceId"] = workspace_id
else:
if "workspaceId" in body["filter"]:
del body["filter"]["workspaceId"]
if not response.ok:
# Cursor expiration comes back as a 4xx with this error message —
# detect it before raise_for_status so callers can restart the workspace.
if cursor and "cursor has expired" in response.text.lower():
raise _CursorExpiredError(response.text)
logger.error(f"Error fetching transcripts: {response.text}")
response.raise_for_status()
while True:
response = self._throttled_request(
"POST", GongConnector.make_url("/v2/calls/transcript"), json=body
)
# If no calls in the range, just break out
if response.status_code == 404:
break
data = response.json()
return _TranscriptPage(
transcripts=data.get("callTranscripts", []),
next_cursor=data.get("records", {}).get("cursor"),
)
try:
response.raise_for_status()
except Exception:
logger.error(f"Error fetching transcripts: {response.text}")
raise
data = response.json()
call_transcripts = data.get("callTranscripts", [])
transcripts.extend(call_transcripts)
while len(transcripts) >= self.batch_size:
yield transcripts[: self.batch_size]
transcripts = transcripts[self.batch_size :]
cursor = data.get("records", {}).get("cursor")
if cursor:
body["cursor"] = cursor
else:
break
if transcripts:
yield transcripts
def _get_call_details_by_ids(self, call_ids: list[str]) -> dict:
def _get_call_details_by_ids(self, call_ids: list[str]) -> dict[str, Any]:
body = {
"filter": {"callIds": call_ids},
"contentSelector": {"exposedFields": {"parties": True}},
@@ -196,186 +230,46 @@ class GongConnector(LoadConnector, PollConnector):
return id_mapping
def _fetch_calls(
self, start_datetime: str | None = None, end_datetime: str | None = None
) -> GenerateDocumentsOutput:
num_calls = 0
def _resolve_workspace_ids(self) -> list[str | None]:
"""Resolve configured workspace names/IDs to actual workspace IDs.
for transcript_batch in self._get_transcript_batches(
start_datetime, end_datetime
):
doc_batch: list[Document | HierarchyNode] = []
Returns a list of workspace IDs. If no workspaces are configured,
returns [None] to indicate "fetch all workspaces".
transcript_call_ids = cast(
list[str],
[t.get("callId") for t in transcript_batch if t.get("callId")],
Raises ValueError if workspaces are configured but none resolve —
we never silently widen scope to "fetch all" on misconfiguration,
because that could ingest an entire Gong account by mistake.
"""
if not self.workspaces:
return [None]
workspace_map = self._get_workspace_id_map()
resolved: list[str | None] = []
for workspace in self.workspaces:
workspace_id = workspace_map.get(workspace)
if not workspace_id:
logger.error(f"Invalid Gong workspace: {workspace}")
continue
resolved.append(workspace_id)
if not resolved:
raise ValueError(
f"No valid Gong workspaces found — check workspace names/IDs in connector config. Configured: {self.workspaces}"
)
call_details_map: dict[str, Any] = {}
return resolved
# There's a likely race condition in the API where a transcript will have a
# call id but the call to v2/calls/extensive will not return all of the id's
# retry with exponential backoff has been observed to mitigate this
# in ~2 minutes. After max attempts, proceed with whatever we have —
# the per-call loop below will skip missing IDs gracefully.
current_attempt = 0
while True:
current_attempt += 1
call_details_map = self._get_call_details_by_ids(transcript_call_ids)
if set(transcript_call_ids) == set(call_details_map.keys()):
# we got all the id's we were expecting ... break and continue
break
# we are missing some id's. Log and retry with exponential backoff
missing_call_ids = set(transcript_call_ids) - set(
call_details_map.keys()
)
logger.warning(
f"_get_call_details_by_ids is missing call id's: "
f"current_attempt={current_attempt} "
f"missing_call_ids={missing_call_ids}"
)
if current_attempt >= self.MAX_CALL_DETAILS_ATTEMPTS:
logger.error(
f"Giving up on missing call id's after "
f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
f"missing_call_ids={missing_call_ids}"
f"proceeding with {len(call_details_map)} of "
f"{len(transcript_call_ids)} calls"
)
break
wait_seconds = self.CALL_DETAILS_DELAY * pow(2, current_attempt - 1)
logger.warning(
f"_get_call_details_by_ids waiting to retry: "
f"wait={wait_seconds}s "
f"current_attempt={current_attempt} "
f"next_attempt={current_attempt + 1} "
f"max_attempts={self.MAX_CALL_DETAILS_ATTEMPTS}"
)
time.sleep(wait_seconds)
# now we can iterate per call/transcript
for transcript in transcript_batch:
call_id = transcript.get("callId")
if not call_id or call_id not in call_details_map:
# NOTE(rkuo): seeing odd behavior where call_ids from the transcript
# don't have call details. adding error debugging logs to trace.
logger.error(
f"Couldn't get call information for Call ID: {call_id}"
)
if call_id:
logger.error(
f"Call debug info: call_id={call_id} "
f"call_ids={transcript_call_ids} "
f"call_details_map={call_details_map.keys()}"
)
if not self.continue_on_fail:
raise RuntimeError(
f"Couldn't get call information for Call ID: {call_id}"
)
continue
call_details = call_details_map[call_id]
call_metadata = call_details["metaData"]
call_time_str = call_metadata["started"]
call_title = call_metadata["title"]
logger.info(
f"{num_calls + 1}: Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
)
call_parties = cast(list[dict] | None, call_details.get("parties"))
if call_parties is None:
logger.error(f"Couldn't get parties for Call ID: {call_id}")
call_parties = []
id_to_name_map = self._parse_parties(call_parties)
# Keeping a separate dict here in case the parties info is incomplete
speaker_to_name: dict[str, str] = {}
transcript_text = ""
call_purpose = call_metadata["purpose"]
if call_purpose:
transcript_text += f"Call Description: {call_purpose}\n\n"
contents = transcript["transcript"]
for segment in contents:
speaker_id = segment.get("speakerId", "")
if speaker_id not in speaker_to_name:
if self.hide_user_info:
speaker_to_name[speaker_id] = (
f"User {len(speaker_to_name) + 1}"
)
else:
speaker_to_name[speaker_id] = id_to_name_map.get(
speaker_id, "Unknown"
)
speaker_name = speaker_to_name[speaker_id]
sentences = segment.get("sentences", {})
monolog = " ".join(
[sentence.get("text", "") for sentence in sentences]
)
transcript_text += f"{speaker_name}: {monolog}\n\n"
metadata = {}
if call_metadata.get("system"):
metadata["client"] = call_metadata.get("system")
# TODO calls have a clientUniqueId field, can pull that in later
doc_batch.append(
Document(
id=call_id,
sections=[
TextSection(link=call_metadata["url"], text=transcript_text)
],
source=DocumentSource.GONG,
# Should not ever be Untitled as a call cannot be made without a Title
semantic_identifier=call_title or "Untitled",
doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
timezone.utc
),
metadata={"client": call_metadata.get("system")},
)
)
num_calls += 1
yield doc_batch
logger.info(f"_fetch_calls finished: num_calls={num_calls}")
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
combined = (
f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
)
self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
"utf-8"
)
if self.auth_token_basic is None:
raise ConnectorMissingCredentialError("Gong")
self._session.headers.update(
{"Authorization": f"Basic {self.auth_token_basic}"}
)
return None
def load_from_state(self) -> GenerateDocumentsOutput:
return self._fetch_calls()
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
@staticmethod
def _compute_time_range(
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
) -> tuple[str, str]:
"""Compute the start/end datetime strings for the Gong API filter,
applying GONG_CONNECTOR_START_TIME and the 1-day offset."""
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
# if this env variable is set, don't start from a timestamp before the specified
# start time
# TODO: remove this once this is globally available
if GONG_CONNECTOR_START_TIME:
special_start_datetime = datetime.fromisoformat(GONG_CONNECTOR_START_TIME)
special_start_datetime = special_start_datetime.replace(tzinfo=timezone.utc)
@@ -394,11 +288,309 @@ class GongConnector(LoadConnector, PollConnector):
# so adding a 1 day buffer and fetching by default till current time
start_one_day_offset = start_datetime - timedelta(days=1)
start_time = start_one_day_offset.isoformat()
end_time = end_datetime.isoformat()
end_time = datetime.fromtimestamp(end, tz=timezone.utc).isoformat()
return start_time, end_time
logger.info(f"Fetching Gong calls between {start_time} and {end_time}")
return self._fetch_calls(start_time, end_time)
def _build_document(
self,
transcript: dict[str, Any],
call_details: dict[str, Any],
) -> Document:
"""Build a single Document from a transcript and its resolved call details."""
call_id = transcript["callId"]
call_metadata = call_details["metaData"]
call_time_str = call_metadata["started"]
call_title = call_metadata["title"]
logger.info(
f"Indexing Gong call id {call_id} from {call_time_str.split('T', 1)[0]}: {call_title}"
)
call_parties = cast(list[dict] | None, call_details.get("parties"))
if call_parties is None:
logger.error(f"Couldn't get parties for Call ID: {call_id}")
call_parties = []
id_to_name_map = self._parse_parties(call_parties)
speaker_to_name: dict[str, str] = {}
transcript_text = ""
call_purpose = call_metadata["purpose"]
if call_purpose:
transcript_text += f"Call Description: {call_purpose}\n\n"
contents = transcript["transcript"]
for segment in contents:
speaker_id = segment.get("speakerId", "")
if speaker_id not in speaker_to_name:
if self.hide_user_info:
speaker_to_name[speaker_id] = f"User {len(speaker_to_name) + 1}"
else:
speaker_to_name[speaker_id] = id_to_name_map.get(
speaker_id, "Unknown"
)
speaker_name = speaker_to_name[speaker_id]
sentences = segment.get("sentences", {})
monolog = " ".join([sentence.get("text", "") for sentence in sentences])
transcript_text += f"{speaker_name}: {monolog}\n\n"
return Document(
id=call_id,
sections=[TextSection(link=call_metadata["url"], text=transcript_text)],
source=DocumentSource.GONG,
semantic_identifier=call_title or "Untitled",
doc_updated_at=datetime.fromisoformat(call_time_str).astimezone(
timezone.utc
),
metadata={"client": call_metadata.get("system")},
)
def _process_transcripts(
self,
transcripts: list[dict[str, Any]],
checkpoint: GongConnectorCheckpoint,
) -> Generator[Document | ConnectorFailure, None, None]:
"""Fetch call details for a page of transcripts and yield resulting
Documents. Transcripts whose call details are missing (Gong race
condition) are stashed into `checkpoint.pending_transcripts` for retry
on a future checkpoint invocation rather than blocking here.
"""
transcript_call_ids = cast(
list[str],
[t.get("callId") for t in transcripts if t.get("callId")],
)
call_details_map = (
self._get_call_details_by_ids(transcript_call_ids)
if transcript_call_ids
else {}
)
newly_stashed: list[str] = []
for transcript in transcripts:
call_id = transcript.get("callId")
if not call_id:
logger.error(
"Couldn't get call information for transcript missing callId"
)
yield ConnectorFailure(
failed_document=DocumentFailure(document_id="unknown"),
failure_message="Transcript missing callId",
)
continue
if call_id in call_details_map:
yield self._build_document(transcript, call_details_map[call_id])
continue
# Details not available yet — stash for retry on next invocation.
checkpoint.pending_transcripts[call_id] = transcript
newly_stashed.append(call_id)
if newly_stashed:
logger.warning(
f"Gong call details not yet available (race condition); "
f"deferring to next checkpoint invocation: "
f"call_ids={newly_stashed}"
)
# First attempt on any newly-stashed transcripts counts as attempt #1.
# pending_call_details_attempts is guaranteed 0 here because
# load_from_checkpoint only reaches _process_transcripts when
# pending_transcripts was empty at entry (see early-return above).
checkpoint.pending_call_details_attempts = 1
checkpoint.pending_retry_after = time.time() + self._next_retry_delay(1)
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
combined = (
f"{credentials['gong_access_key']}:{credentials['gong_access_key_secret']}"
)
self.auth_token_basic = base64.b64encode(combined.encode("utf-8")).decode(
"utf-8"
)
if self.auth_token_basic is None:
raise ConnectorMissingCredentialError("Gong")
self._session.headers.update(
{"Authorization": f"Basic {self.auth_token_basic}"}
)
return None
def build_dummy_checkpoint(self) -> GongConnectorCheckpoint:
return GongConnectorCheckpoint(has_more=True)
def validate_checkpoint_json(self, checkpoint_json: str) -> GongConnectorCheckpoint:
return GongConnectorCheckpoint.model_validate_json(checkpoint_json)
def load_from_checkpoint(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: GongConnectorCheckpoint,
) -> CheckpointOutput[GongConnectorCheckpoint]:
checkpoint = copy.deepcopy(checkpoint)
# Step 1: Resolve workspace IDs on first call
if checkpoint.workspace_ids is None:
checkpoint.workspace_ids = self._resolve_workspace_ids()
checkpoint.time_range = self._compute_time_range(start, end)
checkpoint.has_more = True
return checkpoint
# Step 2: Resolve any transcripts stashed by a prior invocation whose
# call details were missing due to Gong's propagation race. Worker
# cadence between checkpoint calls provides the spacing between retry
# attempts — no in-call sleep needed.
if checkpoint.pending_transcripts:
yield from self._resolve_pending_transcripts(checkpoint)
# If pending still exists and we haven't exhausted attempts, defer
# the rest of this invocation — _resolve_pending_transcripts set
# has_more=True for us.
if checkpoint.pending_transcripts:
return checkpoint
workspace_ids = checkpoint.workspace_ids
# If we've exhausted all workspaces, we're done
if checkpoint.workspace_index >= len(workspace_ids):
checkpoint.has_more = False
return checkpoint
# Use cached time range, falling back to computation if not cached
start_time, end_time = checkpoint.time_range or self._compute_time_range(
start, end
)
logger.info(
f"Fetching Gong calls between {start_time} and {end_time} "
f"(workspace {checkpoint.workspace_index + 1}/{len(workspace_ids)})"
)
workspace_id = workspace_ids[checkpoint.workspace_index]
# Step 3: Fetch one page of transcripts
try:
page = self._fetch_transcript_page(
start_datetime=start_time,
end_datetime=end_time,
workspace_id=workspace_id,
cursor=checkpoint.cursor,
)
except _CursorExpiredError:
# Gong cursors TTL ~1h from first request in the sequence. If the
# checkpoint paused long enough for the cursor to expire, restart
# the current workspace from the beginning of the time range.
# Document upserts are idempotent (keyed by call_id) so
# reprocessing is safe.
logger.warning(
f"Gong pagination cursor expired for workspace "
f"{checkpoint.workspace_index + 1}/{len(workspace_ids)}; "
f"restarting workspace from beginning of time range."
)
checkpoint.cursor = None
checkpoint.has_more = True
return checkpoint
# Step 4: Process transcripts into documents. Missing-details
# transcripts get stashed into checkpoint.pending_transcripts.
if page.transcripts:
yield from self._process_transcripts(page.transcripts, checkpoint)
# Step 5: Update cursor/workspace state
if page.next_cursor:
checkpoint.cursor = page.next_cursor
checkpoint.has_more = True
else:
checkpoint.workspace_index += 1
checkpoint.cursor = None
checkpoint.has_more = checkpoint.workspace_index < len(workspace_ids)
# If pending transcripts were stashed this invocation, we still have
# work to do on a future invocation even if pagination is exhausted.
if checkpoint.pending_transcripts:
checkpoint.has_more = True
return checkpoint
def _next_retry_delay(self, attempts_done: int) -> float:
"""Seconds to wait before attempt #(attempts_done + 1).
Matches the original exponential backoff: 30, 60, 120, 240, 480.
"""
return self.CALL_DETAILS_DELAY * pow(2, attempts_done - 1)
def _resolve_pending_transcripts(
self,
checkpoint: GongConnectorCheckpoint,
) -> Generator[Document | ConnectorFailure, None, None]:
"""Attempt to resolve transcripts whose call details were unavailable
in a prior invocation. Mutates checkpoint in place: resolved transcripts
are removed from pending_transcripts; on attempt exhaustion, emits
ConnectorFailure for each unresolved call_id and clears pending state.
If the backoff deadline hasn't elapsed yet, returns without issuing
any API call so the next invocation can try again later.
"""
if (
checkpoint.pending_retry_after is not None
and time.time() < checkpoint.pending_retry_after
):
# Backoff still in effect — defer to a later invocation without
# burning an attempt or an API call.
checkpoint.has_more = True
return
pending_call_ids = list(checkpoint.pending_transcripts.keys())
resolved = self._get_call_details_by_ids(pending_call_ids)
for call_id, details in resolved.items():
transcript = checkpoint.pending_transcripts.pop(call_id, None)
if transcript is None:
continue
yield self._build_document(transcript, details)
if not checkpoint.pending_transcripts:
checkpoint.pending_call_details_attempts = 0
checkpoint.pending_retry_after = None
return
checkpoint.pending_call_details_attempts += 1
logger.warning(
f"Gong call details still missing after "
f"{checkpoint.pending_call_details_attempts}/"
f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
f"missing_call_ids={list(checkpoint.pending_transcripts.keys())}"
)
if checkpoint.pending_call_details_attempts >= self.MAX_CALL_DETAILS_ATTEMPTS:
logger.error(
f"Giving up on missing Gong call details after "
f"{self.MAX_CALL_DETAILS_ATTEMPTS} attempts: "
f"missing_call_ids={list(checkpoint.pending_transcripts.keys())}"
)
for call_id in list(checkpoint.pending_transcripts.keys()):
yield ConnectorFailure(
failed_document=DocumentFailure(document_id=call_id),
failure_message=(
f"Couldn't get call details after {self.MAX_CALL_DETAILS_ATTEMPTS} attempts for Call ID: {call_id}"
),
)
checkpoint.pending_transcripts = {}
checkpoint.pending_call_details_attempts = 0
checkpoint.pending_retry_after = None
# has_more is recomputed by the workspace iteration that follows;
# reset to False here so a stale True from a prior invocation
# can't leak out via any future early-return path.
checkpoint.has_more = False
else:
checkpoint.pending_retry_after = time.time() + self._next_retry_delay(
checkpoint.pending_call_details_attempts
)
checkpoint.has_more = True
if __name__ == "__main__":
@@ -412,5 +604,13 @@ if __name__ == "__main__":
}
)
latest_docs = connector.load_from_state()
print(next(latest_docs))
checkpoint = connector.build_dummy_checkpoint()
while checkpoint.has_more:
doc_generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(doc_generator)
print(item)
except StopIteration as e:
checkpoint = e.value
print(f"Checkpoint: {checkpoint}")

View File

@@ -750,31 +750,3 @@ def resync_cc_pair(
)
db_session.commit()
# ── Metrics query helpers ──────────────────────────────────────────────
def get_connector_health_for_metrics(
db_session: Session,
) -> list: # Returns list of Row tuples
"""Return connector health data for Prometheus metrics.
Each row is (cc_pair_id, status, in_repeated_error_state,
last_successful_index_time, name, source).
"""
return (
db_session.query(
ConnectorCredentialPair.id,
ConnectorCredentialPair.status,
ConnectorCredentialPair.in_repeated_error_state,
ConnectorCredentialPair.last_successful_index_time,
ConnectorCredentialPair.name,
Connector.source,
)
.join(
Connector,
ConnectorCredentialPair.connector_id == Connector.id,
)
.all()
)

View File

@@ -11,6 +11,7 @@ from sqlalchemy import event
from sqlalchemy import pool
from sqlalchemy.engine import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.exc import DBAPIError
from sqlalchemy.orm import Session
from onyx.configs.app_configs import DB_READONLY_PASSWORD
@@ -346,6 +347,25 @@ def get_session_with_shared_schema() -> Generator[Session, None, None]:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
def _safe_close_session(session: Session) -> None:
"""Close a session, catching connection-closed errors during cleanup.
Long-running operations (e.g. multi-model LLM loops) can hold a session
open for minutes. If the underlying connection is dropped by cloud
infrastructure (load-balancer timeouts, PgBouncer, idle-in-transaction
timeouts, etc.), the implicit rollback in Session.close() raises
OperationalError or InterfaceError. Since the work is already complete,
we log and move on — SQLAlchemy internally invalidates the connection
for pool recycling.
"""
try:
session.close()
except DBAPIError:
logger.warning(
"DB connection lost during session cleanup — the connection will be invalidated and recycled by the pool."
)
@contextmanager
def get_session_with_tenant(*, tenant_id: str) -> Generator[Session, None, None]:
"""
@@ -358,8 +378,11 @@ def get_session_with_tenant(*, tenant_id: str) -> Generator[Session, None, None]
# no need to use the schema translation map for self-hosted + default schema
if not MULTI_TENANT and tenant_id == POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE:
with Session(bind=engine, expire_on_commit=False) as session:
session = Session(bind=engine, expire_on_commit=False)
try:
yield session
finally:
_safe_close_session(session)
return
# Create connection with schema translation to handle querying the right schema
@@ -367,8 +390,11 @@ def get_session_with_tenant(*, tenant_id: str) -> Generator[Session, None, None]
with engine.connect().execution_options(
schema_translate_map=schema_translate_map
) as connection:
with Session(bind=connection, expire_on_commit=False) as session:
session = Session(bind=connection, expire_on_commit=False)
try:
yield session
finally:
_safe_close_session(session)
def get_session() -> Generator[Session, None, None]:

View File

@@ -2,8 +2,6 @@ from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import NamedTuple
from typing import TYPE_CHECKING
from typing import TypeVarTuple
from sqlalchemy import and_
@@ -30,17 +28,6 @@ from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
if TYPE_CHECKING:
from onyx.configs.constants import DocumentSource
# from sqlalchemy.sql.selectable import Select
# Comment out unused imports that cause mypy errors
# from onyx.auth.models import UserRole
# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS
# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier
# from onyx.db.engine import async_query_for_dms
logger = setup_logger()
@@ -977,106 +964,3 @@ def get_index_attempt_errors_for_cc_pair(
stmt = stmt.offset(page * page_size).limit(page_size)
return list(db_session.scalars(stmt).all())
# ── Metrics query helpers ──────────────────────────────────────────────
class ActiveIndexAttemptMetric(NamedTuple):
"""Row returned by get_active_index_attempts_for_metrics."""
status: IndexingStatus
source: "DocumentSource"
cc_pair_id: int
cc_pair_name: str | None
attempt_count: int
def get_active_index_attempts_for_metrics(
db_session: Session,
) -> list[ActiveIndexAttemptMetric]:
"""Return non-terminal index attempts grouped by status, source, and connector.
Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
"""
from onyx.db.models import Connector
terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
rows = (
db_session.query(
IndexAttempt.status,
Connector.source,
ConnectorCredentialPair.id,
ConnectorCredentialPair.name,
func.count(),
)
.join(
ConnectorCredentialPair,
IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
)
.join(
Connector,
ConnectorCredentialPair.connector_id == Connector.id,
)
.filter(IndexAttempt.status.notin_(terminal_statuses))
.group_by(
IndexAttempt.status,
Connector.source,
ConnectorCredentialPair.id,
ConnectorCredentialPair.name,
)
.all()
)
return [ActiveIndexAttemptMetric(*row) for row in rows]
def get_failed_attempt_counts_by_cc_pair(
db_session: Session,
since: datetime | None = None,
) -> dict[int, int]:
"""Return {cc_pair_id: failed_attempt_count} for all connectors.
When ``since`` is provided, only attempts created after that timestamp
are counted. Defaults to the last 90 days to avoid unbounded historical
aggregation.
"""
if since is None:
since = datetime.now(timezone.utc) - timedelta(days=90)
rows = (
db_session.query(
IndexAttempt.connector_credential_pair_id,
func.count(),
)
.filter(IndexAttempt.status == IndexingStatus.FAILED)
.filter(IndexAttempt.time_created >= since)
.group_by(IndexAttempt.connector_credential_pair_id)
.all()
)
return {cc_id: count for cc_id, count in rows}
def get_docs_indexed_by_cc_pair(
db_session: Session,
since: datetime | None = None,
) -> dict[int, int]:
"""Return {cc_pair_id: total_new_docs_indexed} across successful attempts.
Only counts attempts with status SUCCESS to avoid inflating counts with
partial results from failed attempts. When ``since`` is provided, only
attempts created after that timestamp are included.
"""
if since is None:
since = datetime.now(timezone.utc) - timedelta(days=90)
query = (
db_session.query(
IndexAttempt.connector_credential_pair_id,
func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
)
.filter(IndexAttempt.status == IndexingStatus.SUCCESS)
.filter(IndexAttempt.time_created >= since)
.group_by(IndexAttempt.connector_credential_pair_id)
)
rows = query.all()
return {cc_id: int(total or 0) for cc_id, total in rows}

View File

@@ -5,6 +5,7 @@ from pydantic import ConfigDict
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.db.engine.sql_engine import get_session_with_current_tenant_if_none
from onyx.db.models import Memory
from onyx.db.models import User
@@ -83,47 +84,51 @@ def get_memories(user: User, db_session: Session) -> UserMemoryContext:
def add_memory(
user_id: UUID,
memory_text: str,
db_session: Session,
) -> Memory:
db_session: Session | None = None,
) -> int:
"""Insert a new Memory row for the given user.
If the user already has MAX_MEMORIES_PER_USER memories, the oldest
one (lowest id) is deleted before inserting the new one.
Returns the id of the newly created Memory row.
"""
existing = db_session.scalars(
select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
).all()
with get_session_with_current_tenant_if_none(db_session) as db_session:
existing = db_session.scalars(
select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
).all()
if len(existing) >= MAX_MEMORIES_PER_USER:
db_session.delete(existing[0])
if len(existing) >= MAX_MEMORIES_PER_USER:
db_session.delete(existing[0])
memory = Memory(
user_id=user_id,
memory_text=memory_text,
)
db_session.add(memory)
db_session.commit()
return memory
memory = Memory(
user_id=user_id,
memory_text=memory_text,
)
db_session.add(memory)
db_session.commit()
return memory.id
def update_memory_at_index(
user_id: UUID,
index: int,
new_text: str,
db_session: Session,
) -> Memory | None:
db_session: Session | None = None,
) -> int | None:
"""Update the memory at the given 0-based index (ordered by id ASC, matching get_memories()).
Returns the updated Memory row, or None if the index is out of range.
Returns the id of the updated Memory row, or None if the index is out of range.
"""
memory_rows = db_session.scalars(
select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
).all()
with get_session_with_current_tenant_if_none(db_session) as db_session:
memory_rows = db_session.scalars(
select(Memory).where(Memory.user_id == user_id).order_by(Memory.id.asc())
).all()
if index < 0 or index >= len(memory_rows):
return None
if index < 0 or index >= len(memory_rows):
return None
memory = memory_rows[index]
memory.memory_text = new_text
db_session.commit()
return memory
memory = memory_rows[index]
memory.memory_text = new_text
db_session.commit()
return memory.id

View File

@@ -2,11 +2,17 @@ import datetime
from uuid import UUID
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from onyx.configs.constants import FileOrigin
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import ChatSessionSharedStatus
from onyx.db.models import FileRecord
from onyx.db.models import Persona
from onyx.db.models import Project__UserFile
from onyx.db.models import UserFile
@@ -108,13 +114,73 @@ def update_last_accessed_at_for_user_files(
db_session.commit()
def get_file_id_by_user_file_id(user_file_id: str, db_session: Session) -> str | None:
user_file = db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
def get_file_id_by_user_file_id(
user_file_id: str, user_id: UUID, db_session: Session
) -> str | None:
user_file = (
db_session.query(UserFile)
.filter(UserFile.id == user_file_id, UserFile.user_id == user_id)
.first()
)
if user_file:
return user_file.file_id
return None
def user_can_access_chat_file(file_id: str, user_id: UUID, db_session: Session) -> bool:
"""Return True if `user_id` is allowed to read the raw `file_id` served by
`GET /chat/file/{file_id}`. Access is granted when any of:
- The `file_id` is the storage id of a `UserFile` owned by the user.
- The `file_id` is a persona avatar (`Persona.uploaded_image_id`); avatars
are visible to any authenticated user.
- The `file_id` appears in a `ChatMessage.files` descriptor of a chat
session the user owns or a session publicly shared via
`ChatSessionSharedStatus.PUBLIC`.
"""
owns_user_file = db_session.query(
select(UserFile.id)
.where(UserFile.file_id == file_id, UserFile.user_id == user_id)
.exists()
).scalar()
if owns_user_file:
return True
# TODO: move persona avatars to a dedicated endpoint (e.g.
# /assistants/{id}/avatar) so this branch can be removed. /chat/file is
# currently overloaded with multiple asset classes (user files, chat
# attachments, tool outputs, avatars), forcing this access-check fan-out.
#
# Restrict the avatar path to CHAT_UPLOAD-origin files so an attacker
# cannot bind another user's USER_FILE (or any other origin) to their
# own persona and read it through this check.
is_persona_avatar = db_session.query(
select(Persona.id)
.join(FileRecord, FileRecord.file_id == Persona.uploaded_image_id)
.where(
Persona.uploaded_image_id == file_id,
FileRecord.file_origin == FileOrigin.CHAT_UPLOAD,
)
.exists()
).scalar()
if is_persona_avatar:
return True
chat_file_stmt = (
select(ChatMessage.id)
.join(ChatSession, ChatMessage.chat_session_id == ChatSession.id)
.where(ChatMessage.files.op("@>")([{"id": file_id}]))
.where(
or_(
ChatSession.user_id == user_id,
ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC,
)
)
.limit(1)
)
return db_session.execute(chat_file_stmt).first() is not None
def get_file_ids_by_user_file_ids(
user_file_ids: list[UUID], db_session: Session
) -> list[str]:

View File

@@ -7,8 +7,6 @@ import time
from collections.abc import Callable
from typing import cast
from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import DynamicCitationProcessor
@@ -22,6 +20,7 @@ from onyx.chat.models import LlmStepResult
from onyx.chat.models import ToolCallSimple
from onyx.configs.chat_configs import SKIP_DEEP_RESEARCH_CLARIFICATION
from onyx.configs.constants import MessageType
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.tools import get_tool_by_name
from onyx.deep_research.dr_mock_tools import get_clarification_tool_definitions
from onyx.deep_research.dr_mock_tools import get_orchestrator_tools
@@ -184,6 +183,14 @@ def generate_final_report(
return has_reasoned
def _get_research_agent_tool_id() -> int:
with get_session_with_current_tenant() as db_session:
return get_tool_by_name(
tool_name=RESEARCH_AGENT_TOOL_NAME,
db_session=db_session,
).id
@log_function_time(print_only=True)
def run_deep_research_llm_loop(
emitter: Emitter,
@@ -193,7 +200,6 @@ def run_deep_research_llm_loop(
custom_agent_prompt: str | None, # noqa: ARG001
llm: LLM,
token_counter: Callable[[str], int],
db_session: Session,
skip_clarification: bool = False,
user_identity: LLMUserIdentity | None = None,
chat_session_id: str | None = None,
@@ -717,6 +723,7 @@ def run_deep_research_llm_loop(
simple_chat_history.append(assistant_with_tools)
# Now add TOOL_CALL_RESPONSE messages and tool call info for each result
research_agent_tool_id = _get_research_agent_tool_id()
for tab_index, report in enumerate(
research_results.intermediate_reports
):
@@ -737,10 +744,7 @@ def run_deep_research_llm_loop(
tab_index=tab_index,
tool_name=current_tool_call.tool_name,
tool_call_id=current_tool_call.tool_call_id,
tool_id=get_tool_by_name(
tool_name=RESEARCH_AGENT_TOOL_NAME,
db_session=db_session,
).id,
tool_id=research_agent_tool_id,
reasoning_tokens=llm_step_result.reasoning
or most_recent_reasoning,
tool_call_arguments=current_tool_call.tool_args,

View File

@@ -23,6 +23,7 @@ import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
from PIL import Image
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
from onyx.configs.constants import ONYX_METADATA_FILENAME
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
from onyx.file_processing.file_types import OnyxFileExtensions
@@ -191,6 +192,56 @@ def read_text_file(
return file_content_raw, metadata
def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
"""Return the number of embedded images in a PDF, short-circuiting at cap+1.
Used to reject PDFs whose image count would OOM the user-file-processing
worker during indexing. Returns a value > cap as a sentinel once the count
exceeds the cap, so callers do not iterate thousands of image objects just
to report a number. Returns 0 if the PDF cannot be parsed.
Owner-password-only PDFs (permission restrictions but no open password) are
counted normally — they decrypt with an empty string. Truly password-locked
PDFs are skipped (return 0) since we can't inspect them; the caller should
ensure the password-protected check runs first.
Always restores the file pointer to its original position before returning.
"""
from pypdf import PdfReader
try:
start_pos = file.tell()
except Exception:
start_pos = None
try:
if start_pos is not None:
file.seek(0)
reader = PdfReader(file)
if reader.is_encrypted:
# Try empty password first (owner-password-only PDFs); give up if that fails.
try:
if reader.decrypt("") == 0:
return 0
except Exception:
return 0
count = 0
for page in reader.pages:
for _ in page.images:
count += 1
if count > cap:
return count
return count
except Exception:
logger.warning("Failed to count embedded images in PDF", exc_info=True)
return 0
finally:
if start_pos is not None:
try:
file.seek(start_pos)
except Exception:
pass
def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
"""
Extract text from a PDF. For embedded images, a more complex approach is needed.
@@ -254,8 +305,27 @@ def read_pdf_file(
)
if extract_images:
image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
images_processed = 0
cap_reached = False
for page_num, page in enumerate(pdf_reader.pages):
if cap_reached:
break
for image_file_object in page.images:
if images_processed >= image_cap:
# Defense-in-depth backstop. Upload-time validation
# should have rejected files exceeding the cap, but
# we also break here so a single oversized file can
# never pin a worker.
logger.warning(
"PDF embedded image cap reached (%d). "
"Skipping remaining images on page %d and beyond.",
image_cap,
page_num + 1,
)
cap_reached = True
break
image = Image.open(io.BytesIO(image_file_object.data))
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format=image.format)
@@ -268,6 +338,7 @@ def read_pdf_file(
image_callback(img_bytes, image_name)
else:
extracted_images.append((img_bytes, image_name))
images_processed += 1
return text, metadata, extracted_images

View File

@@ -1516,6 +1516,10 @@
"display_name": "Claude Opus 4.6",
"model_vendor": "anthropic"
},
"claude-opus-4-7": {
"display_name": "Claude Opus 4.7",
"model_vendor": "anthropic"
},
"claude-opus-4-5-20251101": {
"display_name": "Claude Opus 4.5",
"model_vendor": "anthropic",

View File

@@ -46,6 +46,15 @@ ANTHROPIC_REASONING_EFFORT_BUDGET: dict[ReasoningEffort, int] = {
ReasoningEffort.HIGH: 4096,
}
# Newer Anthropic models (Claude Opus 4.7+) use adaptive thinking with
# output_config.effort instead of thinking.type.enabled + budget_tokens.
ANTHROPIC_ADAPTIVE_REASONING_EFFORT: dict[ReasoningEffort, str] = {
ReasoningEffort.AUTO: "medium",
ReasoningEffort.LOW: "low",
ReasoningEffort.MEDIUM: "medium",
ReasoningEffort.HIGH: "high",
}
# Content part structures for multimodal messages
# The classes in this mirror the OpenAI Chat Completions message types and work well with routers like LiteLLM

View File

@@ -23,6 +23,7 @@ from onyx.llm.interfaces import ToolChoiceOptions
from onyx.llm.model_response import ModelResponse
from onyx.llm.model_response import ModelResponseStream
from onyx.llm.model_response import Usage
from onyx.llm.models import ANTHROPIC_ADAPTIVE_REASONING_EFFORT
from onyx.llm.models import ANTHROPIC_REASONING_EFFORT_BUDGET
from onyx.llm.models import OPENAI_REASONING_EFFORT
from onyx.llm.request_context import get_llm_mock_response
@@ -67,8 +68,13 @@ STANDARD_MAX_TOKENS_KWARG = "max_completion_tokens"
_VERTEX_ANTHROPIC_MODELS_REJECTING_OUTPUT_CONFIG = (
"claude-opus-4-5",
"claude-opus-4-6",
"claude-opus-4-7",
)
# Anthropic models that require the adaptive thinking API (thinking.type.adaptive
# + output_config.effort) instead of the legacy thinking.type.enabled + budget_tokens.
_ANTHROPIC_ADAPTIVE_THINKING_MODELS = ("claude-opus-4-7",)
class LLMTimeoutError(Exception):
"""
@@ -230,6 +236,14 @@ def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
)
def _anthropic_uses_adaptive_thinking(model_name: str) -> bool:
normalized_model_name = model_name.lower()
return any(
adaptive_model in normalized_model_name
for adaptive_model in _ANTHROPIC_ADAPTIVE_THINKING_MODELS
)
class LitellmLLM(LLM):
"""Uses Litellm library to allow easy configuration to use a multitude of LLMs
See https://python.langchain.com/docs/integrations/chat/litellm"""
@@ -509,10 +523,6 @@ class LitellmLLM(LLM):
}
elif is_claude_model:
budget_tokens: int | None = ANTHROPIC_REASONING_EFFORT_BUDGET.get(
reasoning_effort
)
# Anthropic requires every assistant message with tool_use
# blocks to start with a thinking block that carries a
# cryptographic signature. We don't preserve those blocks
@@ -520,24 +530,35 @@ class LitellmLLM(LLM):
# contains tool-calling assistant messages. LiteLLM's
# modify_params workaround doesn't cover all providers
# (notably Bedrock).
can_enable_thinking = (
budget_tokens is not None
and not _prompt_contains_tool_call_history(prompt)
)
has_tool_call_history = _prompt_contains_tool_call_history(prompt)
if can_enable_thinking:
assert budget_tokens is not None # mypy
if max_tokens is not None:
# Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
# and the minimum budget tokens is 1024
# Will note that overwriting a developer set max tokens is not ideal but is the best we can do for now
# It is better to allow the LLM to output more reasoning tokens even if it results in a fairly small tool
# call as compared to reducing the budget for reasoning.
max_tokens = max(budget_tokens + 1, max_tokens)
optional_kwargs["thinking"] = {
"type": "enabled",
"budget_tokens": budget_tokens,
}
if _anthropic_uses_adaptive_thinking(self.config.model_name):
# Newer Anthropic models (Claude Opus 4.7+) reject
# thinking.type.enabled — they require the adaptive
# thinking config with output_config.effort.
if not has_tool_call_history:
optional_kwargs["thinking"] = {"type": "adaptive"}
optional_kwargs["output_config"] = {
"effort": ANTHROPIC_ADAPTIVE_REASONING_EFFORT[
reasoning_effort
],
}
else:
budget_tokens: int | None = ANTHROPIC_REASONING_EFFORT_BUDGET.get(
reasoning_effort
)
if budget_tokens is not None and not has_tool_call_history:
if max_tokens is not None:
# Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
# and the minimum budget tokens is 1024
# Will note that overwriting a developer set max tokens is not ideal but is the best we can do for now
# It is better to allow the LLM to output more reasoning tokens even if it results in a fairly small tool
# call as compared to reducing the budget for reasoning.
max_tokens = max(budget_tokens + 1, max_tokens)
optional_kwargs["thinking"] = {
"type": "enabled",
"budget_tokens": budget_tokens,
}
# LiteLLM just does some mapping like this anyway but is incomplete for Anthropic
optional_kwargs.pop("reasoning_effort", None)

View File

@@ -1,6 +1,6 @@
{
"version": "1.1",
"updated_at": "2026-03-05T00:00:00Z",
"version": "1.2",
"updated_at": "2026-04-16T00:00:00Z",
"providers": {
"openai": {
"default_model": { "name": "gpt-5.4" },
@@ -10,8 +10,12 @@
]
},
"anthropic": {
"default_model": "claude-opus-4-6",
"default_model": "claude-opus-4-7",
"additional_visible_models": [
{
"name": "claude-opus-4-7",
"display_name": "Claude Opus 4.7"
},
{
"name": "claude-opus-4-6",
"display_name": "Claude Opus 4.6"

View File

@@ -65,8 +65,9 @@ IMPORTANT: each call to this tool is independent. Variables from previous calls
GENERATE_IMAGE_GUIDANCE = """
## generate_image
NEVER use generate_image unless the user specifically requests an image.
For edits/variations of a previously generated image, pass `reference_image_file_ids` with
the `file_id` values returned by earlier `generate_image` tool results.
To edit, restyle, or vary an existing image, pass its file_id in `reference_image_file_ids`. \
File IDs come from `[attached image — file_id: <id>]` tags on user-attached images or from prior `generate_image` tool results — never invent one. \
Leave `reference_image_file_ids` unset for a fresh generation.
""".lstrip()
MEMORY_GUIDANCE = """

View File

@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session
from onyx.auth.permissions import require_permission
from onyx.background.celery.versioned_apps.client import app as celery_app
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -51,6 +53,9 @@ from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import Permission
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentMetadata
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
@@ -128,6 +133,49 @@ class DeleteFileResponse(BaseModel):
# =============================================================================
def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
"""True if either the filename or the content-type indicates a PDF.
Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
``Content-Type: application/octet-stream``), so we also fall back to
extension-based detection via ``mimetypes.guess_type`` on the filename.
"""
if content_type == "application/pdf":
return True
guessed, _ = mimetypes.guess_type(filename)
return guessed == "application/pdf"
def _check_pdf_image_caps(
filename: str, content: bytes, content_type: str | None, batch_total: int
) -> int:
"""Enforce per-file and per-batch embedded-image caps for PDFs.
Returns the number of embedded images in this file (0 for non-PDFs) so
callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
if either cap is exceeded.
"""
if not _looks_like_pdf(filename, content_type):
return 0
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
# Short-circuit at the larger cap so we get a useful count for both checks.
count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
if count > file_cap:
raise OnyxError(
OnyxErrorCode.INVALID_INPUT,
f"PDF '{filename}' contains too many embedded images "
f"(more than {file_cap}). Try splitting the document into smaller files.",
)
if batch_total + count > batch_cap:
raise OnyxError(
OnyxErrorCode.INVALID_INPUT,
f"Upload would exceed the {batch_cap}-image limit across all "
f"files in this batch. Try uploading fewer image-heavy files at once.",
)
return count
def _sanitize_path(path: str) -> str:
"""Sanitize a file path, removing traversal attempts and normalizing.
@@ -356,6 +404,7 @@ async def upload_files(
uploaded_entries: list[LibraryEntryResponse] = []
total_size = 0
batch_image_total = 0
now = datetime.now(timezone.utc)
# Sanitize the base path
@@ -375,6 +424,14 @@ async def upload_files(
detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB",
)
# Reject PDFs with an unreasonable per-file or per-batch image count
batch_image_total += _check_pdf_image_caps(
filename=file.filename or "unnamed",
content=content,
content_type=file.content_type,
batch_total=batch_image_total,
)
# Validate cumulative storage (existing + this upload batch)
total_size += file_size
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
@@ -473,6 +530,7 @@ async def upload_zip(
uploaded_entries: list[LibraryEntryResponse] = []
total_size = 0
batch_image_total = 0
# Extract zip contents into a subfolder named after the zip file
zip_name = api_sanitize_filename(file.filename or "upload")
@@ -511,6 +569,36 @@ async def upload_zip(
logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
continue
# Skip PDFs that would trip the per-file or per-batch image
# cap (would OOM the user-file-processing worker). Matches
# /upload behavior but uses skip-and-warn to stay consistent
# with the zip path's handling of oversized files.
zip_file_name = zip_info.filename.split("/")[-1]
zip_content_type, _ = mimetypes.guess_type(zip_file_name)
if zip_content_type == "application/pdf":
image_count = count_pdf_embedded_images(
BytesIO(file_content),
max(
MAX_EMBEDDED_IMAGES_PER_FILE,
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
),
)
if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
logger.warning(
"Skipping '%s' - exceeds %d per-file embedded-image cap",
zip_info.filename,
MAX_EMBEDDED_IMAGES_PER_FILE,
)
continue
if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
logger.warning(
"Skipping '%s' - would exceed %d per-batch embedded-image cap",
zip_info.filename,
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
)
continue
batch_image_total += image_count
total_size += file_size
# Validate cumulative storage

View File

@@ -9,7 +9,10 @@ from pydantic import ConfigDict
from pydantic import Field
from sqlalchemy.orm import Session
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
from onyx.db.llm import fetch_default_llm_model
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
@@ -190,6 +193,11 @@ def categorize_uploaded_files(
token_threshold_k * 1000 if token_threshold_k else None
) # 0 → None = no limit
# Running total of embedded images across PDFs in this batch. Once the
# aggregate cap is reached, subsequent PDFs in the same upload are
# rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
batch_image_total = 0
for upload in files:
try:
filename = get_safe_filename(upload)
@@ -252,6 +260,47 @@ def categorize_uploaded_files(
)
continue
# Reject PDFs with an unreasonable number of embedded images
# (either per-file or accumulated across this upload batch).
# A PDF with thousands of embedded images can OOM the
# user-file-processing celery worker because every image is
# decoded with PIL and then sent to the vision LLM.
if extension == ".pdf":
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
# Use the larger of the two caps as the short-circuit
# threshold so we get a useful count for both checks.
# count_pdf_embedded_images restores the stream position.
count = count_pdf_embedded_images(
upload.file, max(file_cap, batch_cap)
)
if count > file_cap:
results.rejected.append(
RejectedFile(
filename=filename,
reason=(
f"PDF contains too many embedded images "
f"(more than {file_cap}). Try splitting "
f"the document into smaller files."
),
)
)
continue
if batch_image_total + count > batch_cap:
results.rejected.append(
RejectedFile(
filename=filename,
reason=(
f"Upload would exceed the "
f"{batch_cap}-image limit across all "
f"files in this batch. Try uploading "
f"fewer image-heavy files at once."
),
)
)
continue
batch_image_total += count
text_content = extract_file_text(
file=upload.file,
file_name=filename,

View File

@@ -183,6 +183,9 @@ def generate_ollama_display_name(model_name: str) -> str:
"qwen2.5:7b""Qwen 2.5 7B"
"mistral:latest""Mistral"
"deepseek-r1:14b""DeepSeek R1 14B"
"gemma4:e4b""Gemma 4 E4B"
"deepseek-v3.1:671b-cloud""DeepSeek V3.1 671B Cloud"
"qwen3-vl:235b-instruct-cloud""Qwen 3-vl 235B Instruct Cloud"
"""
# Split into base name and tag
if ":" in model_name:
@@ -209,13 +212,24 @@ def generate_ollama_display_name(model_name: str) -> str:
# Default: Title case with dashes converted to spaces
display_name = base.replace("-", " ").title()
# Process tag to extract size info (skip "latest")
# Process tag (skip "latest")
if tag and tag.lower() != "latest":
# Extract size like "7b", "70b", "14b"
size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])", tag)
# Check for size prefix like "7b", "70b", optionally followed by modifiers
size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])(-.+)?$", tag)
if size_match:
size = size_match.group(1).upper()
display_name = f"{display_name} {size}"
remainder = size_match.group(2)
if remainder:
# Format modifiers like "-cloud", "-instruct-cloud"
modifiers = " ".join(
p.title() for p in remainder.strip("-").split("-") if p
)
display_name = f"{display_name} {size} {modifiers}"
else:
display_name = f"{display_name} {size}"
else:
# Non-size tags like "e4b", "q4_0", "fp16", "cloud"
display_name = f"{display_name} {tag.upper()}"
return display_name

View File

@@ -1,13 +1,14 @@
import json
import secrets
from collections.abc import AsyncIterator
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Query
from fastapi import UploadFile
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from pydantic import Field
from sqlalchemy.orm import Session
from onyx.auth.permissions import require_permission
@@ -113,28 +114,47 @@ async def transcribe_audio(
) from exc
def _extract_provider_error(exc: Exception) -> str:
"""Extract a human-readable message from a provider exception.
Provider errors often embed JSON from upstream APIs (e.g. ElevenLabs).
This tries to parse a readable ``message`` field out of common JSON
error shapes; falls back to ``str(exc)`` if nothing better is found.
"""
raw = str(exc)
try:
# Many providers embed JSON after a prefix like "ElevenLabs TTS failed: {...}"
json_start = raw.find("{")
if json_start == -1:
return raw
parsed = json.loads(raw[json_start:])
# Shape: {"detail": {"message": "..."}} (ElevenLabs)
detail = parsed.get("detail", parsed)
if isinstance(detail, dict):
return detail.get("message") or detail.get("error") or raw
if isinstance(detail, str):
return detail
except (json.JSONDecodeError, AttributeError, TypeError):
pass
return raw
class SynthesizeRequest(BaseModel):
text: str = Field(..., min_length=1)
voice: str | None = None
speed: float | None = Field(default=None, ge=0.5, le=2.0)
@router.post("/synthesize")
async def synthesize_speech(
text: str | None = Query(
default=None, description="Text to synthesize", max_length=4096
),
voice: str | None = Query(default=None, description="Voice ID to use"),
speed: float | None = Query(
default=None, description="Playback speed (0.5-2.0)", ge=0.5, le=2.0
),
body: SynthesizeRequest,
user: User = Depends(require_permission(Permission.BASIC_ACCESS)),
) -> StreamingResponse:
"""
Synthesize text to speech using the default TTS provider.
Accepts parameters via query string for streaming compatibility.
"""
logger.info(
f"TTS request: text length={len(text) if text else 0}, voice={voice}, speed={speed}"
)
if not text:
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Text is required")
"""Synthesize text to speech using the default TTS provider."""
text = body.text
voice = body.voice
speed = body.speed
logger.info(f"TTS request: text length={len(text)}, voice={voice}, speed={speed}")
# Use short-lived session to fetch provider config, then release connection
# before starting the long-running streaming response
@@ -177,31 +197,36 @@ async def synthesize_speech(
logger.error(f"Failed to get voice provider: {exc}")
raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc
# Session is now closed - streaming response won't hold DB connection
# Pull the first chunk before returning the StreamingResponse. If the
# provider rejects the request (e.g. text too long), the error surfaces
# as a proper HTTP error instead of a broken audio stream.
stream_iter = provider.synthesize_stream(
text=text, voice=final_voice, speed=final_speed
)
try:
first_chunk = await stream_iter.__anext__()
except StopAsyncIteration:
raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, "TTS provider returned no audio")
except Exception as exc:
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, _extract_provider_error(exc)
) from exc
async def audio_stream() -> AsyncIterator[bytes]:
try:
chunk_count = 0
async for chunk in provider.synthesize_stream(
text=text, voice=final_voice, speed=final_speed
):
chunk_count += 1
yield chunk
logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
except NotImplementedError as exc:
logger.error(f"TTS not implemented: {exc}")
raise
except Exception as exc:
logger.error(f"Synthesis failed: {exc}")
raise
yield first_chunk
chunk_count = 1
async for chunk in stream_iter:
chunk_count += 1
yield chunk
logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
return StreamingResponse(
audio_stream(),
media_type="audio/mpeg",
headers={
"Content-Disposition": "inline; filename=speech.mp3",
# Allow streaming by not setting content-length
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no", # Disable nginx buffering
"X-Accel-Buffering": "no",
},
)

View File

@@ -1,7 +1,8 @@
"""Generic Celery task lifecycle Prometheus metrics.
Provides signal handlers that track task started/completed/failed counts,
active task gauge, task duration histograms, and retry/reject/revoke counts.
active task gauge, task duration histograms, queue wait time histograms,
and retry/reject/revoke counts.
These fire for ALL tasks on the worker — no per-connector enrichment
(see indexing_task_metrics.py for that).
@@ -71,6 +72,32 @@ TASK_REJECTED = Counter(
["task_name"],
)
TASK_QUEUE_WAIT = Histogram(
"onyx_celery_task_queue_wait_seconds",
"Time a Celery task spent waiting in the queue before execution started",
["task_name", "queue"],
buckets=[
0.1,
0.5,
1,
5,
30,
60,
300,
600,
1800,
3600,
7200,
14400,
28800,
43200,
86400,
172800,
432000,
864000,
],
)
# task_id → (monotonic start time, metric labels)
_task_start_times: dict[str, tuple[float, dict[str, str]]] = {}
@@ -133,6 +160,13 @@ def on_celery_task_prerun(
with _task_start_times_lock:
_evict_stale_start_times()
_task_start_times[task_id] = (time.monotonic(), labels)
headers = getattr(task.request, "headers", None) or {}
enqueued_at = headers.get("enqueued_at")
if isinstance(enqueued_at, (int, float)):
TASK_QUEUE_WAIT.labels(**labels).observe(
max(0.0, time.time() - enqueued_at)
)
except Exception:
logger.debug("Failed to record celery task prerun metrics", exc_info=True)

View File

@@ -0,0 +1,123 @@
"""Prometheus metrics for connector health and index attempts.
Emitted by docfetching and docprocessing workers when connector or
index attempt state changes. All functions silently catch exceptions
to avoid disrupting the caller's business logic.
Gauge metrics (error state, last success timestamp) are per-process.
With multiple worker pods, use max() aggregation in PromQL to get the
correct value across instances, e.g.:
max by (cc_pair_id, connector_name) (onyx_connector_in_error_state)
Unlike the per-task counters in indexing_task_metrics.py, these metrics
include connector_name because their cardinality is bounded by the number
of connectors (one series per connector), not by the number of task
executions.
"""
from prometheus_client import Counter
from prometheus_client import Gauge
from onyx.utils.logger import setup_logger
logger = setup_logger()
_CONNECTOR_LABELS = ["tenant_id", "source", "cc_pair_id", "connector_name"]
# --- Index attempt lifecycle ---
INDEX_ATTEMPT_STATUS = Counter(
"onyx_index_attempt_transitions_total",
"Index attempt status transitions",
[*_CONNECTOR_LABELS, "status"],
)
# --- Connector health ---
CONNECTOR_IN_ERROR_STATE = Gauge(
"onyx_connector_in_error_state",
"Whether the connector is in a repeated error state (1=yes, 0=no)",
_CONNECTOR_LABELS,
)
CONNECTOR_LAST_SUCCESS_TIMESTAMP = Gauge(
"onyx_connector_last_success_timestamp_seconds",
"Unix timestamp of last successful indexing for this connector",
_CONNECTOR_LABELS,
)
CONNECTOR_DOCS_INDEXED = Counter(
"onyx_connector_docs_indexed_total",
"Total documents indexed per connector (monotonic)",
_CONNECTOR_LABELS,
)
CONNECTOR_INDEXING_ERRORS = Counter(
"onyx_connector_indexing_errors_total",
"Total failed index attempts per connector (monotonic)",
_CONNECTOR_LABELS,
)
def on_index_attempt_status_change(
tenant_id: str,
source: str,
cc_pair_id: int,
connector_name: str,
status: str,
) -> None:
"""Called on any index attempt status transition."""
try:
labels = {
"tenant_id": tenant_id,
"source": source,
"cc_pair_id": str(cc_pair_id),
"connector_name": connector_name,
}
INDEX_ATTEMPT_STATUS.labels(**labels, status=status).inc()
if status == "failed":
CONNECTOR_INDEXING_ERRORS.labels(**labels).inc()
except Exception:
logger.debug("Failed to record index attempt status metric", exc_info=True)
def on_connector_error_state_change(
tenant_id: str,
source: str,
cc_pair_id: int,
connector_name: str,
in_error: bool,
) -> None:
"""Called when a connector's in_repeated_error_state changes."""
try:
CONNECTOR_IN_ERROR_STATE.labels(
tenant_id=tenant_id,
source=source,
cc_pair_id=str(cc_pair_id),
connector_name=connector_name,
).set(1.0 if in_error else 0.0)
except Exception:
logger.debug("Failed to record connector error state metric", exc_info=True)
def on_connector_indexing_success(
tenant_id: str,
source: str,
cc_pair_id: int,
connector_name: str,
docs_indexed: int,
success_timestamp: float,
) -> None:
"""Called when an indexing run completes successfully."""
try:
labels = {
"tenant_id": tenant_id,
"source": source,
"cc_pair_id": str(cc_pair_id),
"connector_name": connector_name,
}
CONNECTOR_LAST_SUCCESS_TIMESTAMP.labels(**labels).set(success_timestamp)
if docs_indexed > 0:
CONNECTOR_DOCS_INDEXED.labels(**labels).inc(docs_indexed)
except Exception:
logger.debug("Failed to record connector success metric", exc_info=True)

View File

@@ -0,0 +1,104 @@
"""Connector-deletion-specific Prometheus metrics.
Tracks the deletion lifecycle:
1. Deletions started (taskset generated)
2. Deletions completed (success or failure)
3. Taskset duration (from taskset generation to completion or failure).
Note: this measures the most recent taskset execution, NOT wall-clock
time since the user triggered the deletion. When deletion is blocked by
indexing/pruning/permissions, the fence is cleared and a fresh taskset
is generated on each retry, resetting this timer.
4. Deletion blocked by dependencies (indexing, pruning, permissions, etc.)
5. Fence resets (stuck deletion recovery)
All metrics are labeled by tenant_id. cc_pair_id is intentionally excluded
to avoid unbounded cardinality.
Usage:
from onyx.server.metrics.deletion_metrics import (
inc_deletion_started,
inc_deletion_completed,
observe_deletion_taskset_duration,
inc_deletion_blocked,
inc_deletion_fence_reset,
)
"""
from prometheus_client import Counter
from prometheus_client import Histogram
from onyx.utils.logger import setup_logger
logger = setup_logger()
DELETION_STARTED = Counter(
"onyx_deletion_started_total",
"Connector deletions initiated (taskset generated)",
["tenant_id"],
)
DELETION_COMPLETED = Counter(
"onyx_deletion_completed_total",
"Connector deletions completed",
["tenant_id", "outcome"],
)
DELETION_TASKSET_DURATION = Histogram(
"onyx_deletion_taskset_duration_seconds",
"Duration of a connector deletion taskset, from taskset generation "
"to completion or failure. Does not include time spent blocked on "
"indexing/pruning/permissions before the taskset was generated.",
["tenant_id", "outcome"],
buckets=[10, 30, 60, 120, 300, 600, 1800, 3600, 7200, 21600],
)
DELETION_BLOCKED = Counter(
"onyx_deletion_blocked_total",
"Times deletion was blocked by a dependency",
["tenant_id", "blocker"],
)
DELETION_FENCE_RESET = Counter(
"onyx_deletion_fence_reset_total",
"Deletion fences reset due to missing celery tasks",
["tenant_id"],
)
def inc_deletion_started(tenant_id: str) -> None:
try:
DELETION_STARTED.labels(tenant_id=tenant_id).inc()
except Exception:
logger.debug("Failed to record deletion started", exc_info=True)
def inc_deletion_completed(tenant_id: str, outcome: str) -> None:
try:
DELETION_COMPLETED.labels(tenant_id=tenant_id, outcome=outcome).inc()
except Exception:
logger.debug("Failed to record deletion completed", exc_info=True)
def observe_deletion_taskset_duration(
tenant_id: str, outcome: str, duration_seconds: float
) -> None:
try:
DELETION_TASKSET_DURATION.labels(tenant_id=tenant_id, outcome=outcome).observe(
duration_seconds
)
except Exception:
logger.debug("Failed to record deletion taskset duration", exc_info=True)
def inc_deletion_blocked(tenant_id: str, blocker: str) -> None:
try:
DELETION_BLOCKED.labels(tenant_id=tenant_id, blocker=blocker).inc()
except Exception:
logger.debug("Failed to record deletion blocked", exc_info=True)
def inc_deletion_fence_reset(tenant_id: str) -> None:
try:
DELETION_FENCE_RESET.labels(tenant_id=tenant_id).inc()
except Exception:
logger.debug("Failed to record deletion fence reset", exc_info=True)

View File

@@ -1,25 +1,30 @@
"""Prometheus collectors for Celery queue depths and indexing pipeline state.
"""Prometheus collectors for Celery queue depths and infrastructure health.
These collectors query Redis and Postgres at scrape time (the Collector pattern),
These collectors query Redis at scrape time (the Collector pattern),
so metrics are always fresh when Prometheus scrapes /metrics. They run inside the
monitoring celery worker which already has Redis and DB access.
monitoring celery worker which already has Redis access.
To avoid hammering Redis/Postgres on every 15s scrape, results are cached with
To avoid hammering Redis on every 15s scrape, results are cached with
a configurable TTL (default 30s). This means metrics may be up to TTL seconds
stale, which is fine for monitoring dashboards.
Note: connector health and index attempt metrics are push-based (emitted by
workers at state-change time) and live in connector_health_metrics.py.
"""
from __future__ import annotations
import concurrent.futures
import json
import threading
import time
from datetime import datetime
from datetime import timezone
from typing import Any
from prometheus_client.core import GaugeMetricFamily
from prometheus_client.registry import Collector
from redis import Redis
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.configs.constants import OnyxCeleryQueues
@@ -31,6 +36,11 @@ logger = setup_logger()
# the previous result without re-querying Redis/Postgres.
_DEFAULT_CACHE_TTL = 30.0
# Maximum time (seconds) a single _collect_fresh() call may take before
# the collector gives up and returns stale/empty results. Prevents the
# /metrics endpoint from hanging indefinitely when a DB or Redis query stalls.
_DEFAULT_COLLECT_TIMEOUT = 120.0
_QUEUE_LABEL_MAP: dict[str, str] = {
OnyxCeleryQueues.PRIMARY: "primary",
OnyxCeleryQueues.DOCPROCESSING: "docprocessing",
@@ -62,18 +72,32 @@ _UNACKED_QUEUES: list[str] = [
class _CachedCollector(Collector):
"""Base collector with TTL-based caching.
"""Base collector with TTL-based caching and timeout protection.
Subclasses implement ``_collect_fresh()`` to query the actual data source.
The base ``collect()`` returns cached results if the TTL hasn't expired,
avoiding repeated queries when Prometheus scrapes frequently.
A per-collection timeout prevents a slow DB or Redis query from blocking
the /metrics endpoint indefinitely. If _collect_fresh() exceeds the
timeout, stale cached results are returned instead.
"""
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
def __init__(
self,
cache_ttl: float = _DEFAULT_CACHE_TTL,
collect_timeout: float = _DEFAULT_COLLECT_TIMEOUT,
) -> None:
self._cache_ttl = cache_ttl
self._collect_timeout = collect_timeout
self._cached_result: list[GaugeMetricFamily] | None = None
self._last_collect_time: float = 0.0
self._lock = threading.Lock()
self._executor = concurrent.futures.ThreadPoolExecutor(
max_workers=1,
thread_name_prefix=type(self).__name__,
)
self._inflight: concurrent.futures.Future | None = None
def collect(self) -> list[GaugeMetricFamily]:
with self._lock:
@@ -84,12 +108,28 @@ class _CachedCollector(Collector):
):
return self._cached_result
# If a previous _collect_fresh() is still running, wait on it
# rather than queuing another. This prevents unbounded task
# accumulation in the executor during extended DB outages.
if self._inflight is not None and not self._inflight.done():
future = self._inflight
else:
future = self._executor.submit(self._collect_fresh)
self._inflight = future
try:
result = self._collect_fresh()
result = future.result(timeout=self._collect_timeout)
self._inflight = None
self._cached_result = result
self._last_collect_time = now
return result
except concurrent.futures.TimeoutError:
logger.warning(
f"{type(self).__name__}._collect_fresh() timed out after {self._collect_timeout}s, returning stale cache"
)
return self._cached_result if self._cached_result is not None else []
except Exception:
self._inflight = None
logger.exception(f"Error in {type(self).__name__}.collect()")
# Return stale cache on error rather than nothing — avoids
# metrics disappearing during transient failures.
@@ -117,8 +157,6 @@ class QueueDepthCollector(_CachedCollector):
if self._celery_app is None:
return []
from onyx.background.celery.celery_redis import celery_get_broker_client
redis_client = celery_get_broker_client(self._celery_app)
depth = GaugeMetricFamily(
@@ -194,208 +232,6 @@ class QueueDepthCollector(_CachedCollector):
return None
class IndexAttemptCollector(_CachedCollector):
"""Queries Postgres for index attempt state on each scrape."""
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
super().__init__(cache_ttl)
self._configured: bool = False
self._terminal_statuses: list = []
def configure(self) -> None:
"""Call once DB engine is initialized."""
from onyx.db.enums import IndexingStatus
self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
self._configured = True
def _collect_fresh(self) -> list[GaugeMetricFamily]:
if not self._configured:
return []
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.index_attempt import get_active_index_attempts_for_metrics
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
attempts_gauge = GaugeMetricFamily(
"onyx_index_attempts_active",
"Number of non-terminal index attempts",
labels=[
"status",
"source",
"tenant_id",
"connector_name",
"cc_pair_id",
],
)
tenant_ids = get_all_tenant_ids()
for tid in tenant_ids:
# Defensive guard — get_all_tenant_ids() should never yield None,
# but we guard here for API stability in case the contract changes.
if tid is None:
continue
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
try:
with get_session_with_current_tenant() as session:
rows = get_active_index_attempts_for_metrics(session)
for status, source, cc_id, cc_name, count in rows:
name_val = cc_name or f"cc_pair_{cc_id}"
attempts_gauge.add_metric(
[
status.value,
source.value,
tid,
name_val,
str(cc_id),
],
count,
)
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
return [attempts_gauge]
class ConnectorHealthCollector(_CachedCollector):
"""Queries Postgres for connector health state on each scrape."""
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
super().__init__(cache_ttl)
self._configured: bool = False
def configure(self) -> None:
"""Call once DB engine is initialized."""
self._configured = True
def _collect_fresh(self) -> list[GaugeMetricFamily]:
if not self._configured:
return []
from onyx.db.connector_credential_pair import (
get_connector_health_for_metrics,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.index_attempt import get_docs_indexed_by_cc_pair
from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
staleness_gauge = GaugeMetricFamily(
"onyx_connector_last_success_age_seconds",
"Seconds since last successful index for this connector",
labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
)
error_state_gauge = GaugeMetricFamily(
"onyx_connector_in_error_state",
"Whether the connector is in a repeated error state (1=yes, 0=no)",
labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
)
by_status_gauge = GaugeMetricFamily(
"onyx_connectors_by_status",
"Number of connectors grouped by status",
labels=["tenant_id", "status"],
)
error_total_gauge = GaugeMetricFamily(
"onyx_connectors_in_error_total",
"Total number of connectors in repeated error state",
labels=["tenant_id"],
)
per_connector_labels = [
"tenant_id",
"source",
"cc_pair_id",
"connector_name",
]
docs_success_gauge = GaugeMetricFamily(
"onyx_connector_docs_indexed",
"Total new documents indexed (90-day rolling sum) per connector",
labels=per_connector_labels,
)
docs_error_gauge = GaugeMetricFamily(
"onyx_connector_error_count",
"Total number of failed index attempts per connector",
labels=per_connector_labels,
)
now = datetime.now(tz=timezone.utc)
tenant_ids = get_all_tenant_ids()
for tid in tenant_ids:
# Defensive guard — get_all_tenant_ids() should never yield None,
# but we guard here for API stability in case the contract changes.
if tid is None:
continue
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
try:
with get_session_with_current_tenant() as session:
pairs = get_connector_health_for_metrics(session)
error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)
docs_by_cc = get_docs_indexed_by_cc_pair(session)
status_counts: dict[str, int] = {}
error_count = 0
for (
cc_id,
status,
in_error,
last_success,
cc_name,
source,
) in pairs:
cc_id_str = str(cc_id)
source_val = source.value
name_val = cc_name or f"cc_pair_{cc_id}"
label_vals = [tid, source_val, cc_id_str, name_val]
if last_success is not None:
# Both `now` and `last_success` are timezone-aware
# (the DB column uses DateTime(timezone=True)),
# so subtraction is safe.
age = (now - last_success).total_seconds()
staleness_gauge.add_metric(label_vals, age)
error_state_gauge.add_metric(
label_vals,
1.0 if in_error else 0.0,
)
if in_error:
error_count += 1
docs_success_gauge.add_metric(
label_vals,
docs_by_cc.get(cc_id, 0),
)
docs_error_gauge.add_metric(
label_vals,
error_counts_by_cc.get(cc_id, 0),
)
status_val = status.value
status_counts[status_val] = status_counts.get(status_val, 0) + 1
for status_val, count in status_counts.items():
by_status_gauge.add_metric([tid, status_val], count)
error_total_gauge.add_metric([tid], error_count)
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
return [
staleness_gauge,
error_state_gauge,
by_status_gauge,
error_total_gauge,
docs_success_gauge,
docs_error_gauge,
]
class RedisHealthCollector(_CachedCollector):
"""Collects Redis server health metrics (memory, clients, etc.)."""
@@ -411,8 +247,6 @@ class RedisHealthCollector(_CachedCollector):
if self._celery_app is None:
return []
from onyx.background.celery.celery_redis import celery_get_broker_client
redis_client = celery_get_broker_client(self._celery_app)
memory_used = GaugeMetricFamily(
@@ -495,7 +329,9 @@ class WorkerHeartbeatMonitor:
},
)
recv.capture(
limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True
limit=None,
timeout=self._HEARTBEAT_TIMEOUT_SECONDS,
wakeup=True,
)
except Exception:
if self._running:
@@ -553,6 +389,15 @@ class WorkerHealthCollector(_CachedCollector):
Reads worker status from ``WorkerHeartbeatMonitor`` which listens
to the Celery event stream via a single persistent connection.
TODO: every monitoring pod subscribes to the cluster-wide Celery event
stream, so each replica reports health for *all* workers in the cluster,
not just itself. Prometheus distinguishes the replicas via the ``instance``
label, so this doesn't break scraping, but it means N monitoring replicas
do N× the work and may emit slightly inconsistent snapshots of the same
cluster. The proper fix is to have each worker expose its own health (or
to elect a single monitoring replica as the reporter) rather than
broadcasting the full cluster view from every monitoring pod.
"""
def __init__(self, cache_ttl: float = 30.0) -> None:
@@ -571,10 +416,16 @@ class WorkerHealthCollector(_CachedCollector):
"onyx_celery_active_worker_count",
"Number of active Celery workers with recent heartbeats",
)
# Celery hostnames are ``{worker_type}@{nodename}`` (see supervisord.conf).
# Emitting only the worker_type as a label causes N replicas of the same
# type to collapse into identical timeseries within a single scrape,
# which Prometheus rejects as "duplicate sample for timestamp". Split
# the pieces into separate labels so each replica is distinct; callers
# can still ``sum by (worker_type)`` to recover the old aggregated view.
worker_up = GaugeMetricFamily(
"onyx_celery_worker_up",
"Whether a specific Celery worker is alive (1=up, 0=down)",
labels=["worker"],
labels=["worker_type", "hostname"],
)
try:
@@ -582,11 +433,15 @@ class WorkerHealthCollector(_CachedCollector):
alive_count = sum(1 for alive in status.values() if alive)
active_workers.add_metric([], alive_count)
for hostname in sorted(status):
# Use short name (before @) for single-host deployments,
# full hostname when multiple hosts share a worker type.
label = hostname.split("@")[0]
worker_up.add_metric([label], 1 if status[hostname] else 0)
for full_hostname in sorted(status):
worker_type, sep, host = full_hostname.partition("@")
if not sep:
# Hostname didn't contain "@" — fall back to using the
# whole string as the hostname with an empty type.
worker_type, host = "", full_hostname
worker_up.add_metric(
[worker_type, host], 1 if status[full_hostname] else 0
)
except Exception:
logger.debug("Failed to collect worker health metrics", exc_info=True)

View File

@@ -6,8 +6,6 @@ Called once by the monitoring celery worker after Redis and DB are ready.
from celery import Celery
from prometheus_client.registry import REGISTRY
from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
@@ -21,8 +19,6 @@ logger = setup_logger()
# module level ensures they survive the lifetime of the worker process and are
# only registered with the Prometheus registry once.
_queue_collector = QueueDepthCollector()
_attempt_collector = IndexAttemptCollector()
_connector_collector = ConnectorHealthCollector()
_redis_health_collector = RedisHealthCollector()
_worker_health_collector = WorkerHealthCollector()
_heartbeat_monitor: WorkerHeartbeatMonitor | None = None
@@ -34,6 +30,9 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
Args:
celery_app: The Celery application instance. Used to obtain a
broker Redis client on each scrape for queue depth metrics.
Note: connector health and index attempt metrics are push-based
(see connector_health_metrics.py) and do not use collectors.
"""
_queue_collector.set_celery_app(celery_app)
_redis_health_collector.set_celery_app(celery_app)
@@ -47,13 +46,8 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
_heartbeat_monitor.start()
_worker_health_collector.set_monitor(_heartbeat_monitor)
_attempt_collector.configure()
_connector_collector.configure()
for collector in (
_queue_collector,
_attempt_collector,
_connector_collector,
_redis_health_collector,
_worker_health_collector,
):

View File

@@ -27,6 +27,8 @@ _DEFAULT_PORTS: dict[str, int] = {
"docfetching": 9092,
"docprocessing": 9093,
"heavy": 9094,
"light": 9095,
"primary": 9097,
}
_server_started = False

View File

@@ -28,14 +28,14 @@ PRUNING_ENUMERATION_DURATION = Histogram(
"onyx_pruning_enumeration_duration_seconds",
"Duration of document ID enumeration from the source connector during pruning",
["connector_type"],
buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
buckets=[5, 60, 600, 1800, 3600, 10800, 21600],
)
PRUNING_DIFF_DURATION = Histogram(
"onyx_pruning_diff_duration_seconds",
"Duration of diff computation and subtask dispatch during pruning",
["connector_type"],
buckets=[1, 5, 15, 30, 60, 120, 300, 600, 1800, 3600],
buckets=[0.1, 0.25, 0.5, 1, 2, 5, 15, 30, 60],
)
PRUNING_RATE_LIMIT_ERRORS = Counter(

View File

@@ -63,6 +63,7 @@ from onyx.db.persona import get_persona_by_id
from onyx.db.usage import increment_usage
from onyx.db.usage import UsageType
from onyx.db.user_file import get_file_id_by_user_file_id
from onyx.db.user_file import user_can_access_chat_file
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.file_store.file_store import get_default_file_store
@@ -866,14 +867,18 @@ def seed_chat_from_slack(
def fetch_chat_file(
file_id: str,
request: Request,
_: User = Depends(require_permission(Permission.BASIC_ACCESS)),
user: User = Depends(require_permission(Permission.BASIC_ACCESS)),
db_session: Session = Depends(get_session),
) -> Response:
# For user files, we need to get the file id from the user file id
file_id_from_user_file = get_file_id_by_user_file_id(file_id, db_session)
file_id_from_user_file = get_file_id_by_user_file_id(file_id, user.id, db_session)
if file_id_from_user_file:
file_id = file_id_from_user_file
elif not user_can_access_chat_file(file_id, user.id, db_session):
# Return 404 (rather than 403) so callers cannot probe for file
# existence across ownership boundaries.
raise OnyxError(OnyxErrorCode.NOT_FOUND, "File not found")
file_store = get_default_file_store()
file_record = file_store.read_file_record(file_id)
@@ -981,11 +986,21 @@ async def search_chats(
@router.post("/stop-chat-session/{chat_session_id}", tags=PUBLIC_API_TAGS)
def stop_chat_session(
chat_session_id: UUID,
user: User = Depends(require_permission(Permission.BASIC_ACCESS)), # noqa: ARG001
user: User = Depends(require_permission(Permission.BASIC_ACCESS)),
db_session: Session = Depends(get_session),
) -> dict[str, str]:
"""
Stop a chat session by setting a stop signal.
This endpoint is called by the frontend when the user clicks the stop button.
"""
try:
get_chat_session_by_id(
chat_session_id=chat_session_id,
user_id=user.id,
db_session=db_session,
)
except ValueError:
raise OnyxError(OnyxErrorCode.SESSION_NOT_FOUND, "Chat session not found")
set_fence(chat_session_id, get_cache_backend(), True)
return {"message": "Chat session stopped"}

View File

@@ -208,12 +208,6 @@ class PythonToolOverrideKwargs(BaseModel):
chat_files: list[ChatFile] = []
class ImageGenerationToolOverrideKwargs(BaseModel):
"""Override kwargs for image generation tool calls."""
recent_generated_image_file_ids: list[str] = []
class SearchToolRunContext(BaseModel):
emitter: Emitter

View File

@@ -10,6 +10,7 @@ from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import PersonaSearchInfo
from onyx.db.engine.sql_engine import get_session_with_current_tenant_if_none
from onyx.db.enums import MCPAuthenticationPerformer
from onyx.db.enums import MCPAuthenticationType
from onyx.db.mcp import get_all_mcp_tools_for_server
@@ -113,10 +114,10 @@ def _get_image_generation_config(llm: LLM, db_session: Session) -> LLMConfig:
def construct_tools(
persona: Persona,
db_session: Session,
emitter: Emitter,
user: User,
llm: LLM,
db_session: Session | None = None,
search_tool_config: SearchToolConfig | None = None,
custom_tool_config: CustomToolConfig | None = None,
file_reader_tool_config: FileReaderToolConfig | None = None,
@@ -131,6 +132,33 @@ def construct_tools(
``attached_documents``, and ``hierarchy_nodes`` already eager-loaded
(e.g. via ``eager_load_persona=True`` or ``eager_load_for_tools=True``)
to avoid lazy SQL queries after the session may have been flushed."""
with get_session_with_current_tenant_if_none(db_session) as db_session:
return _construct_tools_impl(
persona=persona,
db_session=db_session,
emitter=emitter,
user=user,
llm=llm,
search_tool_config=search_tool_config,
custom_tool_config=custom_tool_config,
file_reader_tool_config=file_reader_tool_config,
allowed_tool_ids=allowed_tool_ids,
search_usage_forcing_setting=search_usage_forcing_setting,
)
def _construct_tools_impl(
persona: Persona,
db_session: Session,
emitter: Emitter,
user: User,
llm: LLM,
search_tool_config: SearchToolConfig | None = None,
custom_tool_config: CustomToolConfig | None = None,
file_reader_tool_config: FileReaderToolConfig | None = None,
allowed_tool_ids: list[int] | None = None,
search_usage_forcing_setting: SearchToolUsage = SearchToolUsage.AUTO,
) -> dict[int, list[Tool]]:
tool_dict: dict[int, list[Tool]] = {}
# Log which tools are attached to the persona for debugging

View File

@@ -26,7 +26,6 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ImageGenerationToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
@@ -48,7 +47,7 @@ PROMPT_FIELD = "prompt"
REFERENCE_IMAGE_FILE_IDS_FIELD = "reference_image_file_ids"
class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
class ImageGenerationTool(Tool[None]):
NAME = "generate_image"
DESCRIPTION = "Generate an image based on a prompt. Do not use unless the user specifically requests an image."
DISPLAY_NAME = "Image Generation"
@@ -142,8 +141,11 @@ class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
REFERENCE_IMAGE_FILE_IDS_FIELD: {
"type": "array",
"description": (
"Optional image file IDs to use as reference context for edits/variations. "
"Use the file_id values returned by previous generate_image calls."
"Optional file_ids of existing images to edit or use as reference;"
" the first is the primary edit source."
" Get file_ids from `[attached image — file_id: <id>]` tags on"
" user-attached images or from prior generate_image tool responses."
" Omit for a fresh, unrelated generation."
),
"items": {
"type": "string",
@@ -254,41 +256,31 @@ class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
def _resolve_reference_image_file_ids(
self,
llm_kwargs: dict[str, Any],
override_kwargs: ImageGenerationToolOverrideKwargs | None,
) -> list[str]:
raw_reference_ids = llm_kwargs.get(REFERENCE_IMAGE_FILE_IDS_FIELD)
if raw_reference_ids is not None:
if not isinstance(raw_reference_ids, list) or not all(
isinstance(file_id, str) for file_id in raw_reference_ids
):
raise ToolCallException(
message=(
f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
),
llm_facing_message=(
f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
),
)
reference_image_file_ids = [
file_id.strip() for file_id in raw_reference_ids if file_id.strip()
]
elif (
override_kwargs
and override_kwargs.recent_generated_image_file_ids
and self.img_provider.supports_reference_images
):
# If no explicit reference was provided, default to the most recently generated image.
reference_image_file_ids = [
override_kwargs.recent_generated_image_file_ids[-1]
]
else:
reference_image_file_ids = []
if raw_reference_ids is None:
# No references requested — plain generation.
return []
# Deduplicate while preserving order.
if not isinstance(raw_reference_ids, list) or not all(
isinstance(file_id, str) for file_id in raw_reference_ids
):
raise ToolCallException(
message=(
f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
),
llm_facing_message=(
f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
),
)
# Deduplicate while preserving order (first occurrence wins, so the
# LLM's intended "primary edit source" stays at index 0).
deduped_reference_image_ids: list[str] = []
seen_ids: set[str] = set()
for file_id in reference_image_file_ids:
if file_id in seen_ids:
for file_id in raw_reference_ids:
file_id = file_id.strip()
if not file_id or file_id in seen_ids:
continue
seen_ids.add(file_id)
deduped_reference_image_ids.append(file_id)
@@ -302,14 +294,14 @@ class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
f"Reference images requested but provider '{self.provider}' does not support image-editing context."
),
llm_facing_message=(
"This image provider does not support editing from previous image context. "
"This image provider does not support editing from existing images. "
"Try text-only generation, or switch to a provider/model that supports image edits."
),
)
max_reference_images = self.img_provider.max_reference_images
if max_reference_images > 0:
return deduped_reference_image_ids[-max_reference_images:]
return deduped_reference_image_ids[:max_reference_images]
return deduped_reference_image_ids
def _load_reference_images(
@@ -358,7 +350,7 @@ class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
def run(
self,
placement: Placement,
override_kwargs: ImageGenerationToolOverrideKwargs | None = None,
override_kwargs: None = None, # noqa: ARG002
**llm_kwargs: Any,
) -> ToolResponse:
if PROMPT_FIELD not in llm_kwargs:
@@ -373,7 +365,6 @@ class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
shape = ImageShape(llm_kwargs.get("shape", ImageShape.SQUARE.value))
reference_image_file_ids = self._resolve_reference_image_file_ids(
llm_kwargs=llm_kwargs,
override_kwargs=override_kwargs,
)
reference_images = self._load_reference_images(reference_image_file_ids)

View File

@@ -1,4 +1,3 @@
import json
import traceback
from collections import defaultdict
from typing import Any
@@ -14,7 +13,6 @@ from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.tools.interface import Tool
from onyx.tools.models import ChatFile
from onyx.tools.models import ChatMinimalTextMessage
from onyx.tools.models import ImageGenerationToolOverrideKwargs
from onyx.tools.models import OpenURLToolOverrideKwargs
from onyx.tools.models import ParallelToolCallResponse
from onyx.tools.models import PythonToolOverrideKwargs
@@ -24,9 +22,6 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
@@ -110,63 +105,6 @@ def _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff
return merged_calls
def _extract_image_file_ids_from_tool_response_message(
message: str,
) -> list[str]:
try:
parsed_message = json.loads(message)
except json.JSONDecodeError:
return []
parsed_items: list[Any] = (
parsed_message if isinstance(parsed_message, list) else [parsed_message]
)
file_ids: list[str] = []
for item in parsed_items:
if not isinstance(item, dict):
continue
file_id = item.get("file_id")
if isinstance(file_id, str):
file_ids.append(file_id)
return file_ids
def _extract_recent_generated_image_file_ids(
message_history: list[ChatMessageSimple],
) -> list[str]:
tool_name_by_tool_call_id: dict[str, str] = {}
recent_image_file_ids: list[str] = []
seen_file_ids: set[str] = set()
for message in message_history:
if message.message_type == MessageType.ASSISTANT and message.tool_calls:
for tool_call in message.tool_calls:
tool_name_by_tool_call_id[tool_call.tool_call_id] = tool_call.tool_name
continue
if (
message.message_type != MessageType.TOOL_CALL_RESPONSE
or not message.tool_call_id
):
continue
tool_name = tool_name_by_tool_call_id.get(message.tool_call_id)
if tool_name != ImageGenerationTool.NAME:
continue
for file_id in _extract_image_file_ids_from_tool_response_message(
message.message
):
if file_id in seen_file_ids:
continue
seen_file_ids.add(file_id)
recent_image_file_ids.append(file_id)
return recent_image_file_ids
def _safe_run_single_tool(
tool: Tool,
tool_call: ToolCallKickoff,
@@ -386,9 +324,6 @@ def run_tool_calls(
url_to_citation: dict[str, int] = {
url: citation_num for citation_num, url in citation_mapping.items()
}
recent_generated_image_file_ids = _extract_recent_generated_image_file_ids(
message_history
)
# Prepare all tool calls with their override_kwargs
# Each tool gets a unique starting citation number to avoid conflicts when running in parallel
@@ -405,7 +340,6 @@ def run_tool_calls(
| WebSearchToolOverrideKwargs
| OpenURLToolOverrideKwargs
| PythonToolOverrideKwargs
| ImageGenerationToolOverrideKwargs
| MemoryToolOverrideKwargs
| None
) = None
@@ -454,10 +388,6 @@ def run_tool_calls(
override_kwargs = PythonToolOverrideKwargs(
chat_files=chat_files or [],
)
elif isinstance(tool, ImageGenerationTool):
override_kwargs = ImageGenerationToolOverrideKwargs(
recent_generated_image_file_ids=recent_generated_image_file_ids
)
elif isinstance(tool, MemoryTool):
override_kwargs = MemoryToolOverrideKwargs(
user_name=(

View File

@@ -214,7 +214,9 @@ distro==1.9.0
dnspython==2.8.0
# via email-validator
docstring-parser==0.17.0
# via cyclopts
# via
# cyclopts
# google-cloud-aiplatform
docutils==0.22.3
# via rich-rst
dropbox==12.0.2
@@ -270,7 +272,13 @@ gitdb==4.0.12
gitpython==3.1.45
# via braintrust
google-api-core==2.28.1
# via google-api-python-client
# via
# google-api-python-client
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
google-api-python-client==2.86.0
google-auth==2.48.0
# via
@@ -278,21 +286,61 @@ google-auth==2.48.0
# google-api-python-client
# google-auth-httplib2
# google-auth-oauthlib
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
# google-genai
# kubernetes
google-auth-httplib2==0.1.0
# via google-api-python-client
google-auth-oauthlib==1.0.0
google-cloud-aiplatform==1.133.0
# via litellm
google-cloud-bigquery==3.41.0
# via google-cloud-aiplatform
google-cloud-core==2.5.1
# via
# google-cloud-bigquery
# google-cloud-storage
google-cloud-resource-manager==1.17.0
# via google-cloud-aiplatform
google-cloud-storage==3.10.1
# via google-cloud-aiplatform
google-crc32c==1.8.0
# via
# google-cloud-storage
# google-resumable-media
google-genai==1.52.0
# via onyx
# via
# google-cloud-aiplatform
# onyx
google-resumable-media==2.8.2
# via
# google-cloud-bigquery
# google-cloud-storage
googleapis-common-protos==1.72.0
# via
# google-api-core
# grpc-google-iam-v1
# grpcio-status
# opentelemetry-exporter-otlp-proto-http
greenlet==3.2.4
# via
# playwright
# sqlalchemy
grpc-google-iam-v1==0.14.4
# via google-cloud-resource-manager
grpcio==1.80.0
# via
# google-api-core
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
grpcio-status==1.80.0
# via google-api-core
h11==0.16.0
# via
# httpcore
@@ -562,6 +610,8 @@ packaging==24.2
# dask
# distributed
# fastmcp
# google-cloud-aiplatform
# google-cloud-bigquery
# huggingface-hub
# jira
# kombu
@@ -608,12 +658,19 @@ propcache==0.4.1
# aiohttp
# yarl
proto-plus==1.26.1
# via google-api-core
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
protobuf==6.33.5
# via
# ddtrace
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
# onnxruntime
# opentelemetry-proto
# proto-plus
@@ -646,6 +703,7 @@ pydantic==2.11.7
# exa-py
# fastapi
# fastmcp
# google-cloud-aiplatform
# google-genai
# langchain-core
# langfuse
@@ -702,6 +760,7 @@ python-dateutil==2.8.2
# botocore
# celery
# dateparser
# google-cloud-bigquery
# htmldate
# hubspot-api-client
# kubernetes
@@ -780,6 +839,8 @@ requests==2.33.0
# dropbox
# exa-py
# google-api-core
# google-cloud-bigquery
# google-cloud-storage
# google-genai
# hubspot-api-client
# huggingface-hub
@@ -951,7 +1012,9 @@ typing-extensions==4.15.0
# exa-py
# exceptiongroup
# fastapi
# google-cloud-aiplatform
# google-genai
# grpcio
# huggingface-hub
# jira
# langchain-core

View File

@@ -113,6 +113,8 @@ distlib==0.4.0
# via virtualenv
distro==1.9.0
# via openai
docstring-parser==0.17.0
# via google-cloud-aiplatform
durationpy==0.10
# via kubernetes
execnet==2.1.2
@@ -140,14 +142,65 @@ frozenlist==1.8.0
# aiosignal
fsspec==2025.10.0
# via huggingface-hub
google-api-core==2.28.1
# via
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
google-auth==2.48.0
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
# google-genai
# kubernetes
google-cloud-aiplatform==1.133.0
# via litellm
google-cloud-bigquery==3.41.0
# via google-cloud-aiplatform
google-cloud-core==2.5.1
# via
# google-cloud-bigquery
# google-cloud-storage
google-cloud-resource-manager==1.17.0
# via google-cloud-aiplatform
google-cloud-storage==3.10.1
# via google-cloud-aiplatform
google-crc32c==1.8.0
# via
# google-cloud-storage
# google-resumable-media
google-genai==1.52.0
# via onyx
# via
# google-cloud-aiplatform
# onyx
google-resumable-media==2.8.2
# via
# google-cloud-bigquery
# google-cloud-storage
googleapis-common-protos==1.72.0
# via
# google-api-core
# grpc-google-iam-v1
# grpcio-status
greenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
# via sqlalchemy
grpc-google-iam-v1==0.14.4
# via google-cloud-resource-manager
grpcio==1.80.0
# via
# google-api-core
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
grpcio-status==1.80.0
# via google-api-core
h11==0.16.0
# via
# httpcore
@@ -264,6 +317,8 @@ openapi-generator-cli==7.17.0
packaging==24.2
# via
# black
# google-cloud-aiplatform
# google-cloud-bigquery
# hatchling
# huggingface-hub
# ipykernel
@@ -304,6 +359,20 @@ propcache==0.4.1
# via
# aiohttp
# yarl
proto-plus==1.26.1
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
protobuf==6.33.5
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
# proto-plus
psutil==7.1.3
# via ipykernel
ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
@@ -325,6 +394,7 @@ pydantic==2.11.7
# agent-client-protocol
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# litellm
# mcp
@@ -359,6 +429,7 @@ python-dateutil==2.8.2
# via
# aiobotocore
# botocore
# google-cloud-bigquery
# jupyter-client
# kubernetes
# matplotlib
@@ -391,6 +462,9 @@ reorder-python-imports-black==3.14.0
requests==2.33.0
# via
# cohere
# google-api-core
# google-cloud-bigquery
# google-cloud-storage
# google-genai
# huggingface-hub
# kubernetes
@@ -485,7 +559,9 @@ typing-extensions==4.15.0
# celery-types
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# grpcio
# huggingface-hub
# ipython
# mcp

View File

@@ -86,6 +86,8 @@ discord-py==2.4.0
# via onyx
distro==1.9.0
# via openai
docstring-parser==0.17.0
# via google-cloud-aiplatform
durationpy==0.10
# via kubernetes
fastapi==0.133.1
@@ -102,12 +104,63 @@ frozenlist==1.8.0
# aiosignal
fsspec==2025.10.0
# via huggingface-hub
google-api-core==2.28.1
# via
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
google-auth==2.48.0
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
# google-genai
# kubernetes
google-cloud-aiplatform==1.133.0
# via litellm
google-cloud-bigquery==3.41.0
# via google-cloud-aiplatform
google-cloud-core==2.5.1
# via
# google-cloud-bigquery
# google-cloud-storage
google-cloud-resource-manager==1.17.0
# via google-cloud-aiplatform
google-cloud-storage==3.10.1
# via google-cloud-aiplatform
google-crc32c==1.8.0
# via
# google-cloud-storage
# google-resumable-media
google-genai==1.52.0
# via onyx
# via
# google-cloud-aiplatform
# onyx
google-resumable-media==2.8.2
# via
# google-cloud-bigquery
# google-cloud-storage
googleapis-common-protos==1.72.0
# via
# google-api-core
# grpc-google-iam-v1
# grpcio-status
grpc-google-iam-v1==0.14.4
# via google-cloud-resource-manager
grpcio==1.80.0
# via
# google-api-core
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
grpcio-status==1.80.0
# via google-api-core
h11==0.16.0
# via
# httpcore
@@ -178,7 +231,10 @@ openai==2.14.0
# litellm
# onyx
packaging==24.2
# via huggingface-hub
# via
# google-cloud-aiplatform
# google-cloud-bigquery
# huggingface-hub
parameterized==0.9.0
# via cohere
posthog==3.7.4
@@ -192,6 +248,20 @@ propcache==0.4.1
# via
# aiohttp
# yarl
proto-plus==1.26.1
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
protobuf==6.33.5
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
# proto-plus
py==1.11.0
# via retry
pyasn1==0.6.3
@@ -207,6 +277,7 @@ pydantic==2.11.7
# agent-client-protocol
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# litellm
# mcp
@@ -223,6 +294,7 @@ python-dateutil==2.8.2
# via
# aiobotocore
# botocore
# google-cloud-bigquery
# kubernetes
# posthog
python-dotenv==1.1.1
@@ -246,6 +318,9 @@ regex==2025.11.3
requests==2.33.0
# via
# cohere
# google-api-core
# google-cloud-bigquery
# google-cloud-storage
# google-genai
# huggingface-hub
# kubernetes
@@ -305,7 +380,9 @@ typing-extensions==4.15.0
# anyio
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# grpcio
# huggingface-hub
# mcp
# openai

View File

@@ -101,6 +101,8 @@ discord-py==2.4.0
# via onyx
distro==1.9.0
# via openai
docstring-parser==0.17.0
# via google-cloud-aiplatform
durationpy==0.10
# via kubernetes
einops==0.8.1
@@ -125,12 +127,63 @@ fsspec==2025.10.0
# via
# huggingface-hub
# torch
google-api-core==2.28.1
# via
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
google-auth==2.48.0
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-bigquery
# google-cloud-core
# google-cloud-resource-manager
# google-cloud-storage
# google-genai
# kubernetes
google-cloud-aiplatform==1.133.0
# via litellm
google-cloud-bigquery==3.41.0
# via google-cloud-aiplatform
google-cloud-core==2.5.1
# via
# google-cloud-bigquery
# google-cloud-storage
google-cloud-resource-manager==1.17.0
# via google-cloud-aiplatform
google-cloud-storage==3.10.1
# via google-cloud-aiplatform
google-crc32c==1.8.0
# via
# google-cloud-storage
# google-resumable-media
google-genai==1.52.0
# via onyx
# via
# google-cloud-aiplatform
# onyx
google-resumable-media==2.8.2
# via
# google-cloud-bigquery
# google-cloud-storage
googleapis-common-protos==1.72.0
# via
# google-api-core
# grpc-google-iam-v1
# grpcio-status
grpc-google-iam-v1==0.14.4
# via google-cloud-resource-manager
grpcio==1.80.0
# via
# google-api-core
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
grpcio-status==1.80.0
# via google-api-core
h11==0.16.0
# via
# httpcore
@@ -259,6 +312,8 @@ openai==2.14.0
packaging==24.2
# via
# accelerate
# google-cloud-aiplatform
# google-cloud-bigquery
# huggingface-hub
# kombu
# transformers
@@ -278,6 +333,20 @@ propcache==0.4.1
# via
# aiohttp
# yarl
proto-plus==1.26.1
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
protobuf==6.33.5
# via
# google-api-core
# google-cloud-aiplatform
# google-cloud-resource-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
# proto-plus
psutil==7.1.3
# via accelerate
py==1.11.0
@@ -295,6 +364,7 @@ pydantic==2.11.7
# agent-client-protocol
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# litellm
# mcp
@@ -312,6 +382,7 @@ python-dateutil==2.8.2
# aiobotocore
# botocore
# celery
# google-cloud-bigquery
# kubernetes
python-dotenv==1.1.1
# via
@@ -338,6 +409,9 @@ regex==2025.11.3
requests==2.33.0
# via
# cohere
# google-api-core
# google-cloud-bigquery
# google-cloud-storage
# google-genai
# huggingface-hub
# kubernetes
@@ -425,7 +499,9 @@ typing-extensions==4.15.0
# anyio
# cohere
# fastapi
# google-cloud-aiplatform
# google-genai
# grpcio
# huggingface-hub
# mcp
# openai

View File

@@ -7,7 +7,6 @@ import pytest
from onyx.connectors.gong.connector import GongConnector
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
@pytest.fixture
@@ -32,18 +31,20 @@ def test_gong_basic(
mock_get_api_key: MagicMock, # noqa: ARG001
gong_connector: GongConnector,
) -> None:
doc_batch_generator = gong_connector.poll_source(0, time.time())
doc_batch = next(doc_batch_generator)
with pytest.raises(StopIteration):
next(doc_batch_generator)
assert len(doc_batch) == 2
checkpoint = gong_connector.build_dummy_checkpoint()
docs: list[Document] = []
for doc in doc_batch:
if not isinstance(doc, HierarchyNode):
docs.append(doc)
while checkpoint.has_more:
generator = gong_connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(docs) == 2
assert docs[0].semantic_identifier == "test with chris"
assert docs[1].semantic_identifier == "Testing Gong"

View File

@@ -38,38 +38,41 @@ class TestAddMemory:
def test_add_memory_creates_row(self, db_session: Session, test_user: User) -> None:
"""Verify that add_memory inserts a new Memory row."""
user_id = test_user.id
memory = add_memory(
memory_id = add_memory(
user_id=user_id,
memory_text="User prefers dark mode",
db_session=db_session,
)
assert memory.id is not None
assert memory.user_id == user_id
assert memory.memory_text == "User prefers dark mode"
assert memory_id is not None
# Verify it persists
fetched = db_session.get(Memory, memory.id)
fetched = db_session.get(Memory, memory_id)
assert fetched is not None
assert fetched.user_id == user_id
assert fetched.memory_text == "User prefers dark mode"
def test_add_multiple_memories(self, db_session: Session, test_user: User) -> None:
"""Verify that multiple memories can be added for the same user."""
user_id = test_user.id
m1 = add_memory(
m1_id = add_memory(
user_id=user_id,
memory_text="Favorite color is blue",
db_session=db_session,
)
m2 = add_memory(
m2_id = add_memory(
user_id=user_id,
memory_text="Works in engineering",
db_session=db_session,
)
assert m1.id != m2.id
assert m1.memory_text == "Favorite color is blue"
assert m2.memory_text == "Works in engineering"
assert m1_id != m2_id
fetched_m1 = db_session.get(Memory, m1_id)
fetched_m2 = db_session.get(Memory, m2_id)
assert fetched_m1 is not None
assert fetched_m2 is not None
assert fetched_m1.memory_text == "Favorite color is blue"
assert fetched_m2.memory_text == "Works in engineering"
class TestUpdateMemoryAtIndex:
@@ -82,15 +85,17 @@ class TestUpdateMemoryAtIndex:
add_memory(user_id=user_id, memory_text="Memory 1", db_session=db_session)
add_memory(user_id=user_id, memory_text="Memory 2", db_session=db_session)
updated = update_memory_at_index(
updated_id = update_memory_at_index(
user_id=user_id,
index=1,
new_text="Updated Memory 1",
db_session=db_session,
)
assert updated is not None
assert updated.memory_text == "Updated Memory 1"
assert updated_id is not None
fetched = db_session.get(Memory, updated_id)
assert fetched is not None
assert fetched.memory_text == "Updated Memory 1"
def test_update_memory_at_out_of_range_index(
self, db_session: Session, test_user: User
@@ -167,7 +172,7 @@ class TestMemoryCap:
assert len(rows_before) == MAX_MEMORIES_PER_USER
# Add one more — should evict the oldest
new_memory = add_memory(
new_memory_id = add_memory(
user_id=user_id,
memory_text="New memory after cap",
db_session=db_session,
@@ -181,7 +186,7 @@ class TestMemoryCap:
# Oldest ("Memory 0") should be gone; "Memory 1" is now the oldest
assert rows_after[0].memory_text == "Memory 1"
# Newest should be the one we just added
assert rows_after[-1].id == new_memory.id
assert rows_after[-1].id == new_memory_id
assert rows_after[-1].memory_text == "New memory after cap"
@@ -221,22 +226,26 @@ class TestGetMemoriesWithUserId:
user_id = test_user_no_memories.id
# Add a memory
memory = add_memory(
memory_id = add_memory(
user_id=user_id,
memory_text="Memory with use_memories off",
db_session=db_session,
)
assert memory.memory_text == "Memory with use_memories off"
fetched = db_session.get(Memory, memory_id)
assert fetched is not None
assert fetched.memory_text == "Memory with use_memories off"
# Update that memory
updated = update_memory_at_index(
updated_id = update_memory_at_index(
user_id=user_id,
index=0,
new_text="Updated memory with use_memories off",
db_session=db_session,
)
assert updated is not None
assert updated.memory_text == "Updated memory with use_memories off"
assert updated_id is not None
fetched_updated = db_session.get(Memory, updated_id)
assert fetched_updated is not None
assert fetched_updated.memory_text == "Updated memory with use_memories off"
# Verify get_memories returns the updated memory
context = get_memories(test_user_no_memories, db_session)

View File

@@ -183,3 +183,30 @@ def test_chat_session_not_found_returns_404(basic_user: DATestUser) -> None:
"""Verify unknown IDs return 404."""
response = _get_chat_session(str(uuid4()), basic_user)
assert response.status_code == 404
def _stop_chat_session(chat_session_id: str, user: DATestUser) -> requests.Response:
return requests.post(
f"{API_SERVER_URL}/chat/stop-chat-session/{chat_session_id}",
headers=user.headers,
cookies=user.cookies,
)
def test_stop_chat_session_rejects_non_owner(
basic_user: DATestUser, second_user: DATestUser
) -> None:
"""Non-owner callers must not be able to stop another user's chat session."""
chat_session = ChatSessionManager.create(user_performing_action=basic_user)
# Owner can stop their own session.
response = _stop_chat_session(str(chat_session.id), basic_user)
assert response.status_code == 200
# A different authenticated user must not be able to stop it.
response = _stop_chat_session(str(chat_session.id), second_user)
assert response.status_code == 404
# Unknown session IDs are also rejected.
response = _stop_chat_session(str(uuid4()), second_user)
assert response.status_code == 404

View File

@@ -8,8 +8,10 @@ import io
from typing import NamedTuple
import pytest
import requests
from onyx.file_store.models import FileDescriptor
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
@@ -119,3 +121,31 @@ def test_public_assistant_with_user_files(
assert (
len(chat_history) >= 2
), "Expected at least 2 messages (user message and assistant response)"
def test_cannot_download_other_users_file_via_chat_file_endpoint(
user_file_setup: UserFileTestSetup,
) -> None:
storage_file_id = user_file_setup.user1_file_descriptor["id"]
user_file_id = user_file_setup.user1_file_id
owner_response = requests.get(
f"{API_SERVER_URL}/chat/file/{storage_file_id}",
headers=user_file_setup.user1_file_owner.headers,
)
assert owner_response.status_code == 200
assert owner_response.content, "Owner should receive the file contents"
for file_id in (storage_file_id, user_file_id):
user2_response = requests.get(
f"{API_SERVER_URL}/chat/file/{file_id}",
headers=user_file_setup.user2_non_owner.headers,
)
assert user2_response.status_code in (
403,
404,
), (
f"Expected access denied for non-owner, got {user2_response.status_code} "
f"when fetching file_id={file_id}"
)
assert user2_response.content != owner_response.content

View File

@@ -301,7 +301,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_stop),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",
@@ -332,7 +331,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_one),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",
@@ -363,7 +361,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=emit_one),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",
@@ -391,7 +388,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=always_fail),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",
@@ -423,7 +419,6 @@ class TestRunModels:
),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",
@@ -456,7 +451,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=slow_llm),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",
@@ -497,7 +491,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=slow_llm),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch(
"onyx.chat.process_message.llm_loop_completion_handle"
) as mock_handle,
@@ -519,7 +512,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop"),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch(
"onyx.chat.process_message.llm_loop_completion_handle"
) as mock_handle,
@@ -542,7 +534,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=always_fail),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch(
"onyx.chat.process_message.llm_loop_completion_handle"
) as mock_handle,
@@ -596,7 +587,6 @@ class TestRunModels:
),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch(
"onyx.chat.process_message.llm_loop_completion_handle",
side_effect=lambda *_, **__: completion_called.set(),
@@ -653,7 +643,6 @@ class TestRunModels:
),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch(
"onyx.chat.process_message.llm_loop_completion_handle",
side_effect=lambda *_, **__: completion_called.set(),
@@ -706,7 +695,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop", side_effect=fail_model_0),
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch(
"onyx.chat.process_message.llm_loop_completion_handle"
) as mock_handle,
@@ -736,7 +724,6 @@ class TestRunModels:
patch("onyx.chat.process_message.run_llm_loop") as mock_llm,
patch("onyx.chat.process_message.run_deep_research_llm_loop"),
patch("onyx.chat.process_message.construct_tools", return_value={}),
patch("onyx.chat.process_message.get_session_with_current_tenant"),
patch("onyx.chat.process_message.llm_loop_completion_handle"),
patch(
"onyx.chat.process_message.get_llm_token_counter",

View File

@@ -0,0 +1,53 @@
import datetime
import pytest
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
def test_time_str_to_utc() -> None:
str_to_dt = {
"Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
),
"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
),
"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
),
"30 Jun 2023 18:45:01 +0300": datetime.datetime(
2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
),
"22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
),
"Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
),
}
for strptime, expected_datetime in str_to_dt.items():
assert time_str_to_utc(strptime) == expected_datetime
def test_time_str_to_utc_recovers_from_concatenated_headers() -> None:
# TZ is dropped during recovery, so the expected result is UTC rather
# than the original offset.
assert time_str_to_utc(
'Sat, 3 Nov 2007 14:33:28 -0200To: "jason" <jason@example.net>'
) == datetime.datetime(2007, 11, 3, 14, 33, 28, tzinfo=datetime.timezone.utc)
assert time_str_to_utc(
"Fri, 20 Feb 2015 10:30:00 +0500Cc: someone@example.com"
) == datetime.datetime(2015, 2, 20, 10, 30, 0, tzinfo=datetime.timezone.utc)
def test_time_str_to_utc_raises_on_impossible_dates() -> None:
for bad in (
"Wed, 33 Sep 2007 13:42:59 +0100",
"Thu, 11 Oct 2007 31:50:55 +0900",
"not a date at all",
"",
):
with pytest.raises(ValueError):
time_str_to_utc(bad)

View File

@@ -1,3 +1,4 @@
import copy
import datetime
import json
import os
@@ -8,7 +9,6 @@ from unittest.mock import patch
from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from onyx.connectors.gmail.connector import _build_time_range_query
from onyx.connectors.gmail.connector import GmailCheckpoint
from onyx.connectors.gmail.connector import GmailConnector
@@ -51,29 +51,43 @@ def test_build_time_range_query() -> None:
assert query is None
def test_time_str_to_utc() -> None:
str_to_dt = {
"Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
),
"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
),
"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
),
"30 Jun 2023 18:45:01 +0300": datetime.datetime(
2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
),
"22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
),
"Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
),
}
for strptime, expected_datetime in str_to_dt.items():
assert time_str_to_utc(strptime) == expected_datetime
def _thread_with_date(date_header: str | None) -> dict[str, Any]:
"""Load the fixture thread and replace (or strip, if None) its Date header."""
json_path = os.path.join(os.path.dirname(__file__), "thread.json")
with open(json_path, "r") as f:
thread = cast(dict[str, Any], json.load(f))
thread = copy.deepcopy(thread)
for message in thread["messages"]:
headers: list[dict[str, str]] = message["payload"]["headers"]
if date_header is None:
message["payload"]["headers"] = [
h for h in headers if h.get("name") != "Date"
]
continue
replaced = False
for header in headers:
if header.get("name") == "Date":
header["value"] = date_header
replaced = True
break
if not replaced:
headers.append({"name": "Date", "value": date_header})
return thread
def test_thread_to_document_skips_unparseable_dates() -> None:
for bad_date in (
"Wed, 33 Sep 2007 13:42:59 +0100",
"Thu, 11 Oct 2007 31:50:55 +0900",
"total garbage not even close to a date",
):
doc = thread_to_document(_thread_with_date(bad_date), "admin@example.com")
assert isinstance(doc, Document), f"failed for {bad_date!r}"
assert doc.doc_updated_at is None
assert doc.id == "192edefb315737c3"
def test_gmail_checkpoint_progression() -> None:

View File

@@ -0,0 +1,641 @@
import time
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from onyx.connectors.gong.connector import GongConnector
from onyx.connectors.gong.connector import GongConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
def _make_transcript(call_id: str) -> dict[str, Any]:
return {
"callId": call_id,
"transcript": [
{
"speakerId": "speaker1",
"sentences": [{"text": "Hello world"}],
}
],
}
def _make_call_detail(call_id: str, title: str) -> dict[str, Any]:
return {
"metaData": {
"id": call_id,
"started": "2026-01-15T10:00:00Z",
"title": title,
"purpose": "Test call",
"url": f"https://app.gong.io/call?id={call_id}",
"system": "test-system",
},
"parties": [
{
"speakerId": "speaker1",
"name": "Alice",
"emailAddress": "alice@test.com",
}
],
}
@pytest.fixture
def connector() -> GongConnector:
connector = GongConnector()
connector.load_credentials(
{
"gong_access_key": "test-key",
"gong_access_key_secret": "test-secret",
}
)
return connector
class TestGongConnectorCheckpoint:
def test_build_dummy_checkpoint(self, connector: GongConnector) -> None:
checkpoint = connector.build_dummy_checkpoint()
assert checkpoint.has_more is True
assert checkpoint.workspace_ids is None
assert checkpoint.workspace_index == 0
assert checkpoint.cursor is None
def test_validate_checkpoint_json(self, connector: GongConnector) -> None:
original = GongConnectorCheckpoint(
has_more=True,
workspace_ids=["ws1", None],
workspace_index=1,
cursor="abc123",
pending_transcripts={"call1": _make_transcript("call1")},
pending_call_details_attempts=2,
pending_retry_after=1234567890.5,
)
json_str = original.model_dump_json()
restored = connector.validate_checkpoint_json(json_str)
assert restored == original
@patch.object(GongConnector, "_throttled_request")
def test_first_call_resolves_workspaces(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""First checkpoint call should resolve workspaces and return without fetching."""
# No workspaces configured — should resolve to [None]
checkpoint = connector.build_dummy_checkpoint()
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
# Should return immediately (no yields)
with pytest.raises(StopIteration) as exc_info:
next(generator)
new_checkpoint = exc_info.value.value
assert new_checkpoint.workspace_ids == [None]
assert new_checkpoint.has_more is True
assert new_checkpoint.workspace_index == 0
# No API calls should have been made for workspace resolution
# when no workspaces are configured
mock_request.assert_not_called()
@patch.object(GongConnector, "_throttled_request")
def test_single_page_no_cursor(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""Single page of transcripts with no pagination cursor."""
transcript_response = MagicMock()
transcript_response.status_code = 200
transcript_response.json.return_value = {
"callTranscripts": [_make_transcript("call1")],
"records": {},
}
details_response = MagicMock()
details_response.status_code = 200
details_response.json.return_value = {
"calls": [_make_call_detail("call1", "Test Call")]
}
mock_request.side_effect = [transcript_response, details_response]
# Start from a checkpoint that already has workspaces resolved
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
)
docs: list[Document] = []
failures: list[ConnectorFailure] = []
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
docs.append(item)
elif isinstance(item, ConnectorFailure):
failures.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(docs) == 1
assert docs[0].semantic_identifier == "Test Call"
assert len(failures) == 0
assert checkpoint.has_more is False
assert checkpoint.workspace_index == 1
@patch.object(GongConnector, "_throttled_request")
def test_multi_page_with_cursor(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""Two pages of transcripts — cursor advances between checkpoint calls."""
# Page 1: returns cursor
page1_response = MagicMock()
page1_response.status_code = 200
page1_response.json.return_value = {
"callTranscripts": [_make_transcript("call1")],
"records": {"cursor": "page2cursor"},
}
details1_response = MagicMock()
details1_response.status_code = 200
details1_response.json.return_value = {
"calls": [_make_call_detail("call1", "Call One")]
}
# Page 2: no cursor (done)
page2_response = MagicMock()
page2_response.status_code = 200
page2_response.json.return_value = {
"callTranscripts": [_make_transcript("call2")],
"records": {},
}
details2_response = MagicMock()
details2_response.status_code = 200
details2_response.json.return_value = {
"calls": [_make_call_detail("call2", "Call Two")]
}
mock_request.side_effect = [
page1_response,
details1_response,
page2_response,
details2_response,
]
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
)
all_docs: list[Document] = []
# First checkpoint call — page 1
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
all_docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(all_docs) == 1
assert checkpoint.cursor == "page2cursor"
assert checkpoint.has_more is True
# Second checkpoint call — page 2
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
all_docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(all_docs) == 2
assert all_docs[0].semantic_identifier == "Call One"
assert all_docs[1].semantic_identifier == "Call Two"
assert checkpoint.has_more is False
@patch.object(GongConnector, "_throttled_request")
def test_missing_call_details_yields_failure(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""Missing call details persist across checkpoint invocations and
eventually yield ConnectorFailure once MAX_CALL_DETAILS_ATTEMPTS is hit.
No in-call sleep — retries happen on subsequent invocations, gated by
the wall-clock retry-after deadline on the checkpoint.
"""
transcript_response = MagicMock()
transcript_response.status_code = 200
transcript_response.json.return_value = {
"callTranscripts": [_make_transcript("call1")],
"records": {},
}
# Return empty call details every time (simulating the race condition)
empty_details = MagicMock()
empty_details.status_code = 200
empty_details.json.return_value = {"calls": []}
mock_request.side_effect = [transcript_response] + [
empty_details
] * GongConnector.MAX_CALL_DETAILS_ATTEMPTS
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
)
failures: list[ConnectorFailure] = []
docs: list[Document] = []
# Jump the clock past any retry deadline on each invocation so we
# exercise the retry path without real sleeping. The test for the
# backoff-gate itself lives in test_backoff_gate_prevents_retry_too_soon.
fake_now = [1_000_000.0]
def _advance_clock() -> float:
fake_now[0] += 10_000.0
return fake_now[0]
invocation_cap = GongConnector.MAX_CALL_DETAILS_ATTEMPTS + 5
with patch(
"onyx.connectors.gong.connector.time.time", side_effect=_advance_clock
):
for _ in range(invocation_cap):
if not checkpoint.has_more:
break
generator = connector.load_from_checkpoint(0, fake_now[0], checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, ConnectorFailure):
failures.append(item)
elif isinstance(item, Document):
docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(docs) == 0
assert len(failures) == 1
assert failures[0].failed_document is not None
assert failures[0].failed_document.document_id == "call1"
assert checkpoint.has_more is False
assert checkpoint.pending_transcripts == {}
assert checkpoint.pending_call_details_attempts == 0
assert checkpoint.pending_retry_after is None
assert mock_request.call_count == 1 + GongConnector.MAX_CALL_DETAILS_ATTEMPTS
@patch.object(GongConnector, "_throttled_request")
def test_multi_workspace_iteration(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""Checkpoint iterates through multiple workspaces."""
# Workspace 1: one call
ws1_transcript = MagicMock()
ws1_transcript.status_code = 200
ws1_transcript.json.return_value = {
"callTranscripts": [_make_transcript("call_ws1")],
"records": {},
}
ws1_details = MagicMock()
ws1_details.status_code = 200
ws1_details.json.return_value = {
"calls": [_make_call_detail("call_ws1", "WS1 Call")]
}
# Workspace 2: one call
ws2_transcript = MagicMock()
ws2_transcript.status_code = 200
ws2_transcript.json.return_value = {
"callTranscripts": [_make_transcript("call_ws2")],
"records": {},
}
ws2_details = MagicMock()
ws2_details.status_code = 200
ws2_details.json.return_value = {
"calls": [_make_call_detail("call_ws2", "WS2 Call")]
}
mock_request.side_effect = [
ws1_transcript,
ws1_details,
ws2_transcript,
ws2_details,
]
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=["ws1_id", "ws2_id"],
workspace_index=0,
)
all_docs: list[Document] = []
# Checkpoint call 1 — workspace 1
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
all_docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert checkpoint.workspace_index == 1
assert checkpoint.has_more is True
# Checkpoint call 2 — workspace 2
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
all_docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(all_docs) == 2
assert all_docs[0].semantic_identifier == "WS1 Call"
assert all_docs[1].semantic_identifier == "WS2 Call"
assert checkpoint.has_more is False
assert checkpoint.workspace_index == 2
@patch.object(GongConnector, "_throttled_request")
def test_empty_workspace_404(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""404 from transcript API means no calls — workspace exhausted."""
response_404 = MagicMock()
response_404.status_code = 404
mock_request.return_value = response_404
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
)
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
next(generator)
except StopIteration as e:
checkpoint = e.value
assert checkpoint.has_more is False
assert checkpoint.workspace_index == 1
@patch.object(GongConnector, "_throttled_request")
def test_partial_details_defers_and_resolves_next_invocation(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""A transcript whose call details are missing gets stashed into
pending_transcripts and resolves on a later checkpoint invocation.
Resolved docs are yielded in the order they become available."""
transcript_response = MagicMock()
transcript_response.status_code = 200
transcript_response.json.return_value = {
"callTranscripts": [
_make_transcript("call1"),
_make_transcript("call2"),
],
"records": {},
}
# First fetch: returns call1 but not call2
partial_details = MagicMock()
partial_details.status_code = 200
partial_details.json.return_value = {
"calls": [_make_call_detail("call1", "Call One")]
}
# Second fetch (next invocation): returns call2
missing_details = MagicMock()
missing_details.status_code = 200
missing_details.json.return_value = {
"calls": [_make_call_detail("call2", "Call Two")]
}
mock_request.side_effect = [
transcript_response,
partial_details,
missing_details,
]
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
)
docs: list[Document] = []
fake_now = [1_000_000.0]
def _advance_clock() -> float:
fake_now[0] += 10_000.0
return fake_now[0]
with patch(
"onyx.connectors.gong.connector.time.time", side_effect=_advance_clock
):
# Invocation 1: fetches page + details, yields call1, stashes call2
generator = connector.load_from_checkpoint(0, fake_now[0], checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(docs) == 1
assert docs[0].semantic_identifier == "Call One"
assert "call2" in checkpoint.pending_transcripts
assert checkpoint.pending_call_details_attempts == 1
assert checkpoint.pending_retry_after is not None
assert checkpoint.has_more is True
# Invocation 2: retries missing (only call2), yields it, clears pending
generator = connector.load_from_checkpoint(0, fake_now[0], checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(docs) == 2
assert docs[1].semantic_identifier == "Call Two"
assert checkpoint.pending_transcripts == {}
assert checkpoint.pending_call_details_attempts == 0
assert checkpoint.pending_retry_after is None
# Verify: 3 API calls total (1 transcript + 1 full details + 1 retry for missing only)
assert mock_request.call_count == 3
# The retry call should only request call2, not both
retry_call_body = mock_request.call_args_list[2][1]["json"]
assert retry_call_body["filter"]["callIds"] == ["call2"]
@patch.object(GongConnector, "_throttled_request")
def test_backoff_gate_prevents_retry_too_soon(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""If the retry-after deadline hasn't elapsed, _resolve_pending must
NOT issue a /v2/calls/extensive request. Prevents burning through
MAX_CALL_DETAILS_ATTEMPTS when workers re-invoke tightly.
"""
pending_transcript = _make_transcript("call1")
fixed_now = 1_000_000.0
# Deadline is 30s in the future from fixed_now
retry_after = fixed_now + 30
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
pending_transcripts={"call1": pending_transcript},
pending_call_details_attempts=1,
pending_retry_after=retry_after,
)
with patch("onyx.connectors.gong.connector.time.time", return_value=fixed_now):
generator = connector.load_from_checkpoint(0, fixed_now, checkpoint)
try:
while True:
next(generator)
except StopIteration as e:
checkpoint = e.value
# No API calls should have been made — we were inside the backoff window
mock_request.assert_not_called()
# Pending state preserved for later retry
assert "call1" in checkpoint.pending_transcripts
assert checkpoint.pending_call_details_attempts == 1
assert checkpoint.pending_retry_after == retry_after
assert checkpoint.has_more is True
@patch.object(GongConnector, "_throttled_request")
def test_pending_retry_does_not_block_on_time_sleep(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""Pending-transcript retry must never call time.sleep() with a
non-trivial delay — spacing between retries is enforced via the
wall-clock retry-after deadline stored on the checkpoint, not by
blocking inside load_from_checkpoint.
"""
transcript_response = MagicMock()
transcript_response.status_code = 200
transcript_response.json.return_value = {
"callTranscripts": [_make_transcript("call1")],
"records": {},
}
empty_details = MagicMock()
empty_details.status_code = 200
empty_details.json.return_value = {"calls": []}
mock_request.side_effect = [transcript_response] + [
empty_details
] * GongConnector.MAX_CALL_DETAILS_ATTEMPTS
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
)
fake_now = [1_000_000.0]
def _advance_clock() -> float:
fake_now[0] += 10_000.0
return fake_now[0]
with (
patch("onyx.connectors.gong.connector.time.sleep") as mock_sleep,
patch(
"onyx.connectors.gong.connector.time.time", side_effect=_advance_clock
),
):
invocation_cap = GongConnector.MAX_CALL_DETAILS_ATTEMPTS + 5
for _ in range(invocation_cap):
if not checkpoint.has_more:
break
generator = connector.load_from_checkpoint(0, fake_now[0], checkpoint)
try:
while True:
next(generator)
except StopIteration as e:
checkpoint = e.value
# The only legitimate sleep is the sub-second throttle in
# _throttled_request (<= MIN_REQUEST_INTERVAL). Assert we never
# sleep for anything close to the per-retry backoff delays.
for call in mock_sleep.call_args_list:
delay_arg = call.args[0] if call.args else 0
assert delay_arg <= GongConnector.MIN_REQUEST_INTERVAL
@patch.object(GongConnector, "_throttled_request")
def test_expired_cursor_restarts_workspace(
self,
mock_request: MagicMock,
connector: GongConnector,
) -> None:
"""Expired pagination cursor resets checkpoint to restart the workspace."""
expired_response = MagicMock()
expired_response.status_code = 400
expired_response.ok = False
expired_response.text = '{"requestId":"abc","errors":["cursor has expired"]}'
mock_request.return_value = expired_response
# Checkpoint mid-pagination with a (now-expired) cursor
checkpoint = GongConnectorCheckpoint(
has_more=True,
workspace_ids=[None],
workspace_index=0,
cursor="stale-cursor",
)
docs: list[Document] = []
generator = connector.load_from_checkpoint(0, time.time(), checkpoint)
try:
while True:
item = next(generator)
if isinstance(item, Document):
docs.append(item)
except StopIteration as e:
checkpoint = e.value
assert len(docs) == 0
# Cursor reset so next call restarts the workspace from scratch
assert checkpoint.cursor is None
assert checkpoint.workspace_index == 0
assert checkpoint.has_more is True

View File

@@ -12,6 +12,10 @@ dependency on pypdf internals (pypdf.generic).
from io import BytesIO
from pathlib import Path
import pytest
from onyx.file_processing import extract_file_text
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
from onyx.file_processing.extract_file_text import pdf_to_text
from onyx.file_processing.extract_file_text import read_pdf_file
from onyx.file_processing.password_validation import is_pdf_protected
@@ -96,6 +100,80 @@ class TestReadPdfFile:
# Returned list is empty when callback is used
assert images == []
def test_image_cap_skips_images_above_limit(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""When the embedded-image cap is exceeded, remaining images are skipped.
The cap protects the user-file-processing worker from OOMing on PDFs
with thousands of embedded images. Setting the cap to 0 should yield
zero extracted images even though the fixture has one.
"""
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
assert images == []
def test_image_cap_at_limit_extracts_up_to_cap(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""A cap >= image count behaves identically to the uncapped path."""
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 100)
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
assert len(images) == 1
def test_image_cap_with_callback_stops_streaming_at_limit(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""The cap also short-circuits the streaming callback path."""
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
collected: list[tuple[bytes, str]] = []
def callback(data: bytes, name: str) -> None:
collected.append((data, name))
read_pdf_file(
_load("with_image.pdf"), extract_images=True, image_callback=callback
)
assert collected == []
# ── count_pdf_embedded_images ────────────────────────────────────────────
class TestCountPdfEmbeddedImages:
def test_returns_count_for_normal_pdf(self) -> None:
assert count_pdf_embedded_images(_load("with_image.pdf"), cap=10) == 1
def test_short_circuits_above_cap(self) -> None:
# with_image.pdf has 1 image. cap=0 means "anything > 0 is over cap" —
# function returns on first increment as the over-cap sentinel.
assert count_pdf_embedded_images(_load("with_image.pdf"), cap=0) == 1
def test_returns_zero_for_pdf_without_images(self) -> None:
assert count_pdf_embedded_images(_load("simple.pdf"), cap=10) == 0
def test_returns_zero_for_invalid_pdf(self) -> None:
assert count_pdf_embedded_images(BytesIO(b"not a pdf"), cap=10) == 0
def test_returns_zero_for_password_locked_pdf(self) -> None:
# encrypted.pdf has an open password; we can't inspect without it, so
# the helper returns 0 — callers rely on the password-protected check
# that runs earlier in the upload pipeline.
assert count_pdf_embedded_images(_load("encrypted.pdf"), cap=10) == 0
def test_inspects_owner_password_only_pdf(self) -> None:
# owner_protected.pdf is encrypted but has no open password. It should
# decrypt with an empty string and count images normally. The fixture
# has zero images, so 0 is a real count (not the "bail on encrypted"
# path).
assert count_pdf_embedded_images(_load("owner_protected.pdf"), cap=10) == 0
def test_preserves_file_position(self) -> None:
pdf = _load("with_image.pdf")
pdf.seek(42)
count_pdf_embedded_images(pdf, cap=10)
assert pdf.tell() == 42
# ── pdf_to_text ──────────────────────────────────────────────────────────

View File

@@ -29,6 +29,7 @@ from onyx.llm.utils import get_max_input_tokens
VERTEX_OPUS_MODELS_REJECTING_OUTPUT_CONFIG = [
"claude-opus-4-5@20251101",
"claude-opus-4-6",
"claude-opus-4-7",
]

View File

@@ -100,6 +100,39 @@ class TestGenerateOllamaDisplayName:
result = generate_ollama_display_name("llama3.3:70b")
assert "3.3" in result or "3 3" in result # Either format is acceptable
def test_non_size_tag_shown(self) -> None:
"""Test that non-size tags like 'e4b' are included in the display name."""
result = generate_ollama_display_name("gemma4:e4b")
assert "Gemma" in result
assert "4" in result
assert "E4B" in result
def test_size_with_cloud_modifier(self) -> None:
"""Test size tag with cloud modifier."""
result = generate_ollama_display_name("deepseek-v3.1:671b-cloud")
assert "DeepSeek" in result
assert "671B" in result
assert "Cloud" in result
def test_size_with_multiple_modifiers(self) -> None:
"""Test size tag with multiple modifiers."""
result = generate_ollama_display_name("qwen3-vl:235b-instruct-cloud")
assert "Qwen" in result
assert "235B" in result
assert "Instruct" in result
assert "Cloud" in result
def test_quantization_tag_shown(self) -> None:
"""Test that quantization tags are included in the display name."""
result = generate_ollama_display_name("llama3:q4_0")
assert "Llama" in result
assert "Q4_0" in result
def test_cloud_only_tag(self) -> None:
"""Test standalone cloud tag."""
result = generate_ollama_display_name("glm-4.6:cloud")
assert "CLOUD" in result
class TestStripOpenrouterVendorPrefix:
"""Tests for OpenRouter vendor prefix stripping."""

View File

@@ -95,9 +95,9 @@ class TestForceAddSearchToolGuard:
without a vector DB."""
import inspect
from onyx.tools.tool_constructor import construct_tools
from onyx.tools.tool_constructor import _construct_tools_impl
source = inspect.getsource(construct_tools)
source = inspect.getsource(_construct_tools_impl)
assert (
"DISABLE_VECTOR_DB" in source
), "construct_tools should reference DISABLE_VECTOR_DB to suppress force-adding SearchTool"

View File

@@ -0,0 +1,110 @@
"""Tests for ``ImageGenerationTool._resolve_reference_image_file_ids``.
The resolver turns the LLM's ``reference_image_file_ids`` argument into a
cleaned list of file IDs to hand to ``_load_reference_images``. It trusts
the LLM's picks — the LLM can only see file IDs that actually appear in
the conversation (via ``[attached image — file_id: <id>]`` tags on user
messages and the JSON returned by prior generate_image calls), so we
don't re-validate against an allow-list in the tool itself.
"""
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from onyx.tools.models import ToolCallException
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
)
from onyx.tools.tool_implementations.images.image_generation_tool import (
REFERENCE_IMAGE_FILE_IDS_FIELD,
)
def _make_tool(
supports_reference_images: bool = True,
max_reference_images: int = 16,
) -> ImageGenerationTool:
"""Construct a tool with a mock provider so no credentials/network are needed."""
with patch(
"onyx.tools.tool_implementations.images.image_generation_tool.get_image_generation_provider"
) as mock_get_provider:
mock_provider = MagicMock()
mock_provider.supports_reference_images = supports_reference_images
mock_provider.max_reference_images = max_reference_images
mock_get_provider.return_value = mock_provider
return ImageGenerationTool(
image_generation_credentials=MagicMock(),
tool_id=1,
emitter=MagicMock(),
model="gpt-image-1",
provider="openai",
)
class TestResolveReferenceImageFileIds:
def test_unset_returns_empty_plain_generation(self) -> None:
tool = _make_tool()
assert tool._resolve_reference_image_file_ids(llm_kwargs={}) == []
def test_empty_list_is_treated_like_unset(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: []},
)
assert result == []
def test_passes_llm_supplied_ids_through(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["upload-1", "gen-1"]},
)
# Order preserved — first entry is the primary edit source.
assert result == ["upload-1", "gen-1"]
def test_invalid_shape_raises(self) -> None:
tool = _make_tool()
with pytest.raises(ToolCallException):
tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: "not-a-list"},
)
def test_non_string_element_raises(self) -> None:
tool = _make_tool()
with pytest.raises(ToolCallException):
tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["ok", 123]},
)
def test_deduplicates_preserving_first_occurrence(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1", "gen-2", "gen-1"]},
)
assert result == ["gen-1", "gen-2"]
def test_strips_whitespace_and_skips_empty_strings(self) -> None:
tool = _make_tool()
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: [" gen-1 ", "", " "]},
)
assert result == ["gen-1"]
def test_provider_without_reference_support_raises(self) -> None:
tool = _make_tool(supports_reference_images=False)
with pytest.raises(ToolCallException):
tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1"]},
)
def test_truncates_to_provider_max_preserving_head(self) -> None:
"""When the LLM lists more images than the provider allows, keep the
HEAD of the list (the primary edit source + earliest extras) rather
than the tail, since the LLM put the most important one first."""
tool = _make_tool(max_reference_images=2)
result = tool._resolve_reference_image_file_ids(
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["a", "b", "c", "d"]},
)
assert result == ["a", "b"]

View File

@@ -1,10 +1,5 @@
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.server.query_and_chat.placement import Placement
from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_runner import _extract_image_file_ids_from_tool_response_message
from onyx.tools.tool_runner import _extract_recent_generated_image_file_ids
from onyx.tools.tool_runner import _merge_tool_calls
@@ -312,62 +307,3 @@ class TestMergeToolCalls:
assert len(result) == 1
# String should be converted to list item
assert result[0].tool_args["queries"] == ["single_query", "q2"]
class TestImageHistoryExtraction:
def test_extracts_image_file_ids_from_json_response(self) -> None:
msg = '[{"file_id":"img-1","revised_prompt":"v1"},{"file_id":"img-2","revised_prompt":"v2"}]'
assert _extract_image_file_ids_from_tool_response_message(msg) == [
"img-1",
"img-2",
]
def test_extracts_recent_generated_image_ids_from_history(self) -> None:
history = [
ChatMessageSimple(
message="",
token_count=1,
message_type=MessageType.ASSISTANT,
tool_calls=[
ToolCallSimple(
tool_call_id="call_1",
tool_name="generate_image",
tool_arguments={"prompt": "test"},
token_count=1,
)
],
),
ChatMessageSimple(
message='[{"file_id":"img-1","revised_prompt":"r1"}]',
token_count=1,
message_type=MessageType.TOOL_CALL_RESPONSE,
tool_call_id="call_1",
),
]
assert _extract_recent_generated_image_file_ids(history) == ["img-1"]
def test_ignores_non_image_tool_responses(self) -> None:
history = [
ChatMessageSimple(
message="",
token_count=1,
message_type=MessageType.ASSISTANT,
tool_calls=[
ToolCallSimple(
tool_call_id="call_1",
tool_name="web_search",
tool_arguments={"queries": ["q"]},
token_count=1,
)
],
),
ChatMessageSimple(
message='[{"file_id":"img-1","revised_prompt":"r1"}]',
token_count=1,
message_type=MessageType.TOOL_CALL_RESPONSE,
tool_call_id="call_1",
),
]
assert _extract_recent_generated_image_file_ids(history) == []

View File

@@ -1,15 +1,18 @@
"""Tests for generic Celery task lifecycle Prometheus metrics."""
import time
from collections.abc import Iterator
from unittest.mock import MagicMock
import pytest
from onyx.background.celery.apps.app_base import on_before_task_publish
from onyx.server.metrics.celery_task_metrics import _task_start_times
from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
from onyx.server.metrics.celery_task_metrics import TASK_COMPLETED
from onyx.server.metrics.celery_task_metrics import TASK_DURATION
from onyx.server.metrics.celery_task_metrics import TASK_QUEUE_WAIT
from onyx.server.metrics.celery_task_metrics import TASK_STARTED
from onyx.server.metrics.celery_task_metrics import TASKS_ACTIVE
@@ -22,11 +25,18 @@ def reset_metrics() -> Iterator[None]:
_task_start_times.clear()
def _make_task(name: str = "test_task", queue: str = "test_queue") -> MagicMock:
def _make_task(
name: str = "test_task",
queue: str = "test_queue",
enqueued_at: float | None = None,
) -> MagicMock:
task = MagicMock()
task.name = name
task.request = MagicMock()
task.request.delivery_info = {"routing_key": queue}
task.request.headers = (
{"enqueued_at": enqueued_at} if enqueued_at is not None else {}
)
return task
@@ -72,6 +82,35 @@ class TestCeleryTaskPrerun:
on_celery_task_prerun("task-1", task)
assert "task-1" in _task_start_times
def test_observes_queue_wait_when_enqueued_at_present(self) -> None:
enqueued_at = time.time() - 30 # simulates 30s wait
task = _make_task(enqueued_at=enqueued_at)
before = TASK_QUEUE_WAIT.labels(
task_name="test_task", queue="test_queue"
)._sum.get()
on_celery_task_prerun("task-1", task)
after = TASK_QUEUE_WAIT.labels(
task_name="test_task", queue="test_queue"
)._sum.get()
assert after >= before + 30
def test_skips_queue_wait_when_enqueued_at_missing(self) -> None:
task = _make_task() # no enqueued_at in headers
before = TASK_QUEUE_WAIT.labels(
task_name="test_task", queue="test_queue"
)._sum.get()
on_celery_task_prerun("task-2", task)
after = TASK_QUEUE_WAIT.labels(
task_name="test_task", queue="test_queue"
)._sum.get()
assert after == before
class TestCeleryTaskPostrun:
def test_increments_completed_success(self) -> None:
@@ -151,3 +190,15 @@ class TestCeleryTaskPostrun:
task = _make_task()
on_celery_task_postrun("task-1", task, "SUCCESS")
# Should not raise
class TestBeforeTaskPublish:
def test_stamps_enqueued_at_into_headers(self) -> None:
before = time.time()
headers: dict = {}
on_before_task_publish(headers=headers)
assert "enqueued_at" in headers
assert headers["enqueued_at"] >= before
def test_noop_when_headers_is_none(self) -> None:
on_before_task_publish(headers=None) # should not raise

View File

@@ -0,0 +1,204 @@
"""Tests for deletion-specific Prometheus metrics."""
import pytest
from onyx.server.metrics.deletion_metrics import DELETION_BLOCKED
from onyx.server.metrics.deletion_metrics import DELETION_COMPLETED
from onyx.server.metrics.deletion_metrics import DELETION_FENCE_RESET
from onyx.server.metrics.deletion_metrics import DELETION_STARTED
from onyx.server.metrics.deletion_metrics import DELETION_TASKSET_DURATION
from onyx.server.metrics.deletion_metrics import inc_deletion_blocked
from onyx.server.metrics.deletion_metrics import inc_deletion_completed
from onyx.server.metrics.deletion_metrics import inc_deletion_fence_reset
from onyx.server.metrics.deletion_metrics import inc_deletion_started
from onyx.server.metrics.deletion_metrics import observe_deletion_taskset_duration
class TestIncDeletionStarted:
def test_increments_counter(self) -> None:
before = DELETION_STARTED.labels(tenant_id="t1")._value.get()
inc_deletion_started("t1")
after = DELETION_STARTED.labels(tenant_id="t1")._value.get()
assert after == before + 1
def test_labels_by_tenant(self) -> None:
before_t1 = DELETION_STARTED.labels(tenant_id="t1")._value.get()
before_t2 = DELETION_STARTED.labels(tenant_id="t2")._value.get()
inc_deletion_started("t1")
assert DELETION_STARTED.labels(tenant_id="t1")._value.get() == before_t1 + 1
assert DELETION_STARTED.labels(tenant_id="t2")._value.get() == before_t2
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(
DELETION_STARTED,
"labels",
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
)
inc_deletion_started("t1")
class TestIncDeletionCompleted:
def test_increments_counter(self) -> None:
before = DELETION_COMPLETED.labels(
tenant_id="t1", outcome="success"
)._value.get()
inc_deletion_completed("t1", "success")
after = DELETION_COMPLETED.labels(
tenant_id="t1", outcome="success"
)._value.get()
assert after == before + 1
def test_labels_by_outcome(self) -> None:
before_success = DELETION_COMPLETED.labels(
tenant_id="t1", outcome="success"
)._value.get()
before_failure = DELETION_COMPLETED.labels(
tenant_id="t1", outcome="failure"
)._value.get()
inc_deletion_completed("t1", "success")
assert (
DELETION_COMPLETED.labels(tenant_id="t1", outcome="success")._value.get()
== before_success + 1
)
assert (
DELETION_COMPLETED.labels(tenant_id="t1", outcome="failure")._value.get()
== before_failure
)
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(
DELETION_COMPLETED,
"labels",
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
)
inc_deletion_completed("t1", "success")
class TestObserveDeletionTasksetDuration:
def test_observes_duration(self) -> None:
before = DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="success"
)._sum.get()
observe_deletion_taskset_duration("t1", "success", 120.0)
after = DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="success"
)._sum.get()
assert after == pytest.approx(before + 120.0)
def test_labels_by_tenant(self) -> None:
before_t1 = DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="success"
)._sum.get()
before_t2 = DELETION_TASKSET_DURATION.labels(
tenant_id="t2", outcome="success"
)._sum.get()
observe_deletion_taskset_duration("t1", "success", 60.0)
assert DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="success"
)._sum.get() == pytest.approx(before_t1 + 60.0)
assert DELETION_TASKSET_DURATION.labels(
tenant_id="t2", outcome="success"
)._sum.get() == pytest.approx(before_t2)
def test_labels_by_outcome(self) -> None:
before_success = DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="success"
)._sum.get()
before_failure = DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="failure"
)._sum.get()
observe_deletion_taskset_duration("t1", "failure", 45.0)
assert DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="success"
)._sum.get() == pytest.approx(before_success)
assert DELETION_TASKSET_DURATION.labels(
tenant_id="t1", outcome="failure"
)._sum.get() == pytest.approx(before_failure + 45.0)
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(
DELETION_TASKSET_DURATION,
"labels",
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
)
observe_deletion_taskset_duration("t1", "success", 10.0)
class TestIncDeletionBlocked:
def test_increments_counter(self) -> None:
before = DELETION_BLOCKED.labels(
tenant_id="t1", blocker="indexing"
)._value.get()
inc_deletion_blocked("t1", "indexing")
after = DELETION_BLOCKED.labels(tenant_id="t1", blocker="indexing")._value.get()
assert after == before + 1
def test_labels_by_blocker(self) -> None:
before_idx = DELETION_BLOCKED.labels(
tenant_id="t1", blocker="indexing"
)._value.get()
before_prune = DELETION_BLOCKED.labels(
tenant_id="t1", blocker="pruning"
)._value.get()
inc_deletion_blocked("t1", "indexing")
assert (
DELETION_BLOCKED.labels(tenant_id="t1", blocker="indexing")._value.get()
== before_idx + 1
)
assert (
DELETION_BLOCKED.labels(tenant_id="t1", blocker="pruning")._value.get()
== before_prune
)
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(
DELETION_BLOCKED,
"labels",
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
)
inc_deletion_blocked("t1", "indexing")
class TestIncDeletionFenceReset:
def test_increments_counter(self) -> None:
before = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
inc_deletion_fence_reset("t1")
after = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
assert after == before + 1
def test_labels_by_tenant(self) -> None:
before_t1 = DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get()
before_t2 = DELETION_FENCE_RESET.labels(tenant_id="t2")._value.get()
inc_deletion_fence_reset("t1")
assert DELETION_FENCE_RESET.labels(tenant_id="t1")._value.get() == before_t1 + 1
assert DELETION_FENCE_RESET.labels(tenant_id="t2")._value.get() == before_t2
def test_does_not_raise_on_exception(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(
DELETION_FENCE_RESET,
"labels",
lambda **_: (_ for _ in ()).throw(RuntimeError("boom")),
)
inc_deletion_fence_reset("t1")

View File

@@ -1,16 +1,11 @@
"""Tests for indexing pipeline Prometheus collectors."""
from collections.abc import Iterator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
@@ -18,7 +13,7 @@ from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
def _mock_broker_client() -> Iterator[None]:
"""Patch celery_get_broker_client for all collector tests."""
with patch(
"onyx.background.celery.celery_redis.celery_get_broker_client",
"onyx.server.metrics.indexing_pipeline.celery_get_broker_client",
return_value=MagicMock(),
):
yield
@@ -137,212 +132,3 @@ class TestQueueDepthCollector:
stale_result = collector.collect()
assert stale_result is good_result
class TestIndexAttemptCollector:
def test_returns_empty_when_not_configured(self) -> None:
collector = IndexAttemptCollector()
assert collector.collect() == []
def test_returns_empty_describe(self) -> None:
collector = IndexAttemptCollector()
assert collector.describe() == []
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_collects_index_attempts(
self,
mock_get_session: MagicMock,
mock_get_tenants: MagicMock,
) -> None:
collector = IndexAttemptCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = ["public"]
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
from onyx.db.enums import IndexingStatus
mock_row = (
IndexingStatus.IN_PROGRESS,
MagicMock(value="web"),
81,
"Table Tennis Blade Guide",
2,
)
mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
mock_row
]
families = collector.collect()
assert len(families) == 1
assert families[0].name == "onyx_index_attempts_active"
assert len(families[0].samples) == 1
sample = families[0].samples[0]
assert sample.labels == {
"status": "in_progress",
"source": "web",
"tenant_id": "public",
"connector_name": "Table Tennis Blade Guide",
"cc_pair_id": "81",
}
assert sample.value == 2
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
def test_handles_db_error_gracefully(
self,
mock_get_tenants: MagicMock,
) -> None:
collector = IndexAttemptCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.side_effect = Exception("DB down")
families = collector.collect()
# No stale cache, so returns empty
assert families == []
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
def test_skips_none_tenant_ids(
self,
mock_get_tenants: MagicMock,
) -> None:
collector = IndexAttemptCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = [None]
families = collector.collect()
assert len(families) == 1 # Returns the gauge family, just with no samples
assert len(families[0].samples) == 0
class TestConnectorHealthCollector:
def test_returns_empty_when_not_configured(self) -> None:
collector = ConnectorHealthCollector()
assert collector.collect() == []
def test_returns_empty_describe(self) -> None:
collector = ConnectorHealthCollector()
assert collector.describe() == []
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_collects_connector_health(
self,
mock_get_session: MagicMock,
mock_get_tenants: MagicMock,
) -> None:
collector = ConnectorHealthCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = ["public"]
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
now = datetime.now(tz=timezone.utc)
last_success = now - timedelta(hours=2)
mock_status = MagicMock(value="ACTIVE")
mock_source = MagicMock(value="google_drive")
# Row: (id, status, in_error, last_success, name, source)
mock_row = (
42,
mock_status,
True, # in_repeated_error_state
last_success,
"My GDrive Connector",
mock_source,
)
mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
# Mock the index attempt queries (error counts + docs counts)
mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (
[]
)
families = collector.collect()
assert len(families) == 6
names = {f.name for f in families}
assert names == {
"onyx_connector_last_success_age_seconds",
"onyx_connector_in_error_state",
"onyx_connectors_by_status",
"onyx_connectors_in_error_total",
"onyx_connector_docs_indexed",
"onyx_connector_error_count",
}
staleness = next(
f for f in families if f.name == "onyx_connector_last_success_age_seconds"
)
assert len(staleness.samples) == 1
assert staleness.samples[0].value == pytest.approx(7200, abs=5)
error_state = next(
f for f in families if f.name == "onyx_connector_in_error_state"
)
assert error_state.samples[0].value == 1.0
by_status = next(f for f in families if f.name == "onyx_connectors_by_status")
assert by_status.samples[0].labels == {
"tenant_id": "public",
"status": "ACTIVE",
}
assert by_status.samples[0].value == 1
error_total = next(
f for f in families if f.name == "onyx_connectors_in_error_total"
)
assert error_total.samples[0].value == 1
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_skips_staleness_when_no_last_success(
self,
mock_get_session: MagicMock,
mock_get_tenants: MagicMock,
) -> None:
collector = ConnectorHealthCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = ["public"]
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
mock_status = MagicMock(value="INITIAL_INDEXING")
mock_source = MagicMock(value="slack")
mock_row = (
10,
mock_status,
False,
None, # no last_successful_index_time
0,
mock_source,
)
mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
families = collector.collect()
staleness = next(
f for f in families if f.name == "onyx_connector_last_success_age_seconds"
)
assert len(staleness.samples) == 0
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
def test_handles_db_error_gracefully(
self,
mock_get_tenants: MagicMock,
) -> None:
collector = ConnectorHealthCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.side_effect = Exception("DB down")
families = collector.collect()
assert families == []

View File

@@ -129,12 +129,36 @@ class TestWorkerHealthCollector:
up = families[1]
assert up.name == "onyx_celery_worker_up"
assert len(up.samples) == 3
# Labels use short names (before @)
labels = {s.labels["worker"] for s in up.samples}
assert labels == {"primary", "docfetching", "monitoring"}
label_pairs = {
(s.labels["worker_type"], s.labels["hostname"]) for s in up.samples
}
assert label_pairs == {
("primary", "host1"),
("docfetching", "host1"),
("monitoring", "host1"),
}
for sample in up.samples:
assert sample.value == 1
def test_replicas_of_same_worker_type_are_distinct(self) -> None:
"""Regression: ``docprocessing@pod-1`` and ``docprocessing@pod-2`` must
produce separate samples, not collapse into one duplicate-timestamp
series.
"""
monitor = WorkerHeartbeatMonitor(MagicMock())
monitor._on_heartbeat({"hostname": "docprocessing@pod-1"})
monitor._on_heartbeat({"hostname": "docprocessing@pod-2"})
monitor._on_heartbeat({"hostname": "docprocessing@pod-3"})
collector = WorkerHealthCollector(cache_ttl=0)
collector.set_monitor(monitor)
up = collector.collect()[1]
assert len(up.samples) == 3
hostnames = {s.labels["hostname"] for s in up.samples}
assert hostnames == {"pod-1", "pod-2", "pod-3"}
assert all(s.labels["worker_type"] == "docprocessing" for s in up.samples)
def test_reports_dead_worker(self) -> None:
monitor = WorkerHeartbeatMonitor(MagicMock())
monitor._on_heartbeat({"hostname": "primary@host1"})
@@ -151,9 +175,9 @@ class TestWorkerHealthCollector:
assert active.samples[0].value == 1
up = families[1]
samples_by_name = {s.labels["worker"]: s.value for s in up.samples}
assert samples_by_name["primary"] == 1
assert samples_by_name["monitoring"] == 0
samples_by_type = {s.labels["worker_type"]: s.value for s in up.samples}
assert samples_by_type["primary"] == 1
assert samples_by_type["monitoring"] == 0
def test_empty_monitor_returns_zero(self) -> None:
monitor = WorkerHeartbeatMonitor(MagicMock())

View File

@@ -217,11 +217,23 @@ Enriches docfetching and docprocessing tasks with connector-level labels. Silent
| `onyx_indexing_task_completed_total` | Counter | `task_name`, `source`, `tenant_id`, `cc_pair_id`, `outcome` | Indexing tasks completed per connector |
| `onyx_indexing_task_duration_seconds` | Histogram | `task_name`, `source`, `tenant_id` | Indexing task duration by connector type |
`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string). The pull-based collectors on the monitoring worker include it since they have bounded cardinality (one series per connector).
`connector_name` is intentionally excluded from these per-task counters to avoid unbounded cardinality (it's a free-form user string).
### Connector Health Metrics (`onyx.server.metrics.connector_health_metrics`)
Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration). Unlike the per-task counters above, these include `connector_name` because their cardinality is bounded by the number of connectors (one series per connector), not by the number of task executions.
| Metric | Type | Labels | Description |
| ----------------------------------------------- | ------- | --------------------------------------------------------------- | ------------------------------------------------------------- |
| `onyx_index_attempt_transitions_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name`, `status` | Index attempt status transitions (in_progress, success, etc.) |
| `onyx_connector_in_error_state` | Gauge | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Whether connector is in repeated error state (1=yes, 0=no) |
| `onyx_connector_last_success_timestamp_seconds` | Gauge | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Unix timestamp of last successful indexing |
| `onyx_connector_docs_indexed_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Total documents indexed per connector (monotonic) |
| `onyx_connector_indexing_errors_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `connector_name` | Total failed index attempts per connector (monotonic) |
### Pull-Based Collectors (`onyx.server.metrics.indexing_pipeline`)
Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at scrape time with a 30-second TTL cache.
Registered only in the **Monitoring** worker. Collectors query Redis at scrape time with a 30-second TTL cache and a 120-second timeout to prevent the `/metrics` endpoint from hanging.
| Metric | Type | Labels | Description |
| ------------------------------------ | ----- | ------- | ----------------------------------- |
@@ -229,8 +241,6 @@ Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at
| `onyx_queue_unacked` | Gauge | `queue` | Unacknowledged messages per queue |
| `onyx_queue_oldest_task_age_seconds` | Gauge | `queue` | Age of the oldest task in the queue |
Plus additional connector health, index attempt, and worker heartbeat metrics — see `indexing_pipeline.py` for the full list.
### Adding Metrics to a Worker
Currently only the docfetching and docprocessing workers have push-based task metrics wired up. To add metrics to another worker (e.g. heavy, light, primary):

View File

@@ -12,7 +12,7 @@ dependencies = [
"cohere==5.6.1",
"fastapi==0.133.1",
"google-genai==1.52.0",
"litellm==1.81.6",
"litellm[google]==1.81.6",
"openai==2.14.0",
"pydantic==2.11.7",
"prometheus_client>=0.21.1",

229
uv.lock generated
View File

@@ -2115,6 +2115,12 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/d4/90197b416cb61cefd316964fd9e7bd8324bcbafabf40eef14a9f20b81974/google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c", size = 173706, upload-time = "2025-10-28T21:34:50.151Z" },
]
[package.optional-dependencies]
grpc = [
{ name = "grpcio" },
{ name = "grpcio-status" },
]
[[package]]
name = "google-api-python-client"
version = "2.86.0"
@@ -2172,6 +2178,124 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4a/07/8d9a8186e6768b55dfffeb57c719bc03770cf8a970a074616ae6f9e26a57/google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb", size = 18926, upload-time = "2023-02-07T20:53:18.837Z" },
]
[[package]]
name = "google-cloud-aiplatform"
version = "1.133.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "docstring-parser" },
{ name = "google-api-core", extra = ["grpc"] },
{ name = "google-auth" },
{ name = "google-cloud-bigquery" },
{ name = "google-cloud-resource-manager" },
{ name = "google-cloud-storage" },
{ name = "google-genai" },
{ name = "packaging" },
{ name = "proto-plus" },
{ name = "protobuf" },
{ name = "pydantic" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d4/be/31ce7fd658ddebafbe5583977ddee536b2bacc491ad10b5a067388aec66f/google_cloud_aiplatform-1.133.0.tar.gz", hash = "sha256:3a6540711956dd178daaab3c2c05db476e46d94ac25912b8cf4f59b00b058ae0", size = 9921309, upload-time = "2026-01-08T22:11:25.079Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/01/5b/ef74ff65aebb74eaba51078e33ddd897247ba0d1197fd5a7953126205519/google_cloud_aiplatform-1.133.0-py2.py3-none-any.whl", hash = "sha256:dfc81228e987ca10d1c32c7204e2131b3c8d6b7c8e0b4e23bf7c56816bc4c566", size = 8184595, upload-time = "2026-01-08T22:11:22.067Z" },
]
[[package]]
name = "google-cloud-bigquery"
version = "3.41.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-api-core", extra = ["grpc"] },
{ name = "google-auth" },
{ name = "google-cloud-core" },
{ name = "google-resumable-media" },
{ name = "packaging" },
{ name = "python-dateutil" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ce/13/6515c7aab55a4a0cf708ffd309fb9af5bab54c13e32dc22c5acd6497193c/google_cloud_bigquery-3.41.0.tar.gz", hash = "sha256:2217e488b47ed576360c9b2cc07d59d883a54b83167c0ef37f915c26b01a06fe", size = 513434, upload-time = "2026-03-30T22:50:55.347Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/40/33/1d3902efadef9194566d499d61507e1f038454e0b55499d2d7f8ab2a4fee/google_cloud_bigquery-3.41.0-py3-none-any.whl", hash = "sha256:2a5b5a737b401cbd824a6e5eac7554100b878668d908e6548836b5d8aaa4dcaa", size = 262343, upload-time = "2026-03-30T22:48:45.444Z" },
]
[[package]]
name = "google-cloud-core"
version = "2.5.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-api-core" },
{ name = "google-auth" },
]
sdist = { url = "https://files.pythonhosted.org/packages/dc/24/6ca08b0a03c7b0c620427503ab00353a4ae806b848b93bcea18b6b76fde6/google_cloud_core-2.5.1.tar.gz", hash = "sha256:3dc94bdec9d05a31d9f355045ed0f369fbc0d8c665076c734f065d729800f811", size = 36078, upload-time = "2026-03-30T22:50:08.057Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/73/d9/5bb050cb32826466aa9b25f79e2ca2879fe66cb76782d4ed798dd7506151/google_cloud_core-2.5.1-py3-none-any.whl", hash = "sha256:ea62cdf502c20e3e14be8a32c05ed02113d7bef454e40ff3fab6fe1ec9f1f4e7", size = 29452, upload-time = "2026-03-30T22:48:31.567Z" },
]
[[package]]
name = "google-cloud-resource-manager"
version = "1.17.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-api-core", extra = ["grpc"] },
{ name = "google-auth" },
{ name = "grpc-google-iam-v1" },
{ name = "grpcio" },
{ name = "proto-plus" },
{ name = "protobuf" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b2/1a/13060cabf553d52d151d2afc26b39561e82853380d499dd525a0d422d9f0/google_cloud_resource_manager-1.17.0.tar.gz", hash = "sha256:0f486b62e2c58ff992a3a50fa0f4a96eef7750aa6c971bb373398ccb91828660", size = 464971, upload-time = "2026-03-26T22:17:29.204Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3d/f7/661d7a9023e877a226b5683429c3662f75a29ef45cb1464cf39adb689218/google_cloud_resource_manager-1.17.0-py3-none-any.whl", hash = "sha256:e479baf4b014a57f298e01b8279e3290b032e3476d69c8e5e1427af8f82739a5", size = 404403, upload-time = "2026-03-26T22:15:26.57Z" },
]
[[package]]
name = "google-cloud-storage"
version = "3.10.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-api-core" },
{ name = "google-auth" },
{ name = "google-cloud-core" },
{ name = "google-crc32c" },
{ name = "google-resumable-media" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/4c/47/205eb8e9a1739b5345843e5a425775cbdc472cc38e7eda082ba5b8d02450/google_cloud_storage-3.10.1.tar.gz", hash = "sha256:97db9aa4460727982040edd2bd13ff3d5e2260b5331ad22895802da1fc2a5286", size = 17309950, upload-time = "2026-03-23T09:35:23.409Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ad/ff/ca9ab2417fa913d75aae38bf40bf856bb2749a604b2e0f701b37cfcd23cc/google_cloud_storage-3.10.1-py3-none-any.whl", hash = "sha256:a72f656759b7b99bda700f901adcb3425a828d4a29f911bc26b3ea79c5b1217f", size = 324453, upload-time = "2026-03-23T09:35:21.368Z" },
]
[[package]]
name = "google-crc32c"
version = "1.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5d/ef/21ccfaab3d5078d41efe8612e0ed0bfc9ce22475de074162a91a25f7980d/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8", size = 31298, upload-time = "2025-12-16T00:20:32.241Z" },
{ url = "https://files.pythonhosted.org/packages/c5/b8/f8413d3f4b676136e965e764ceedec904fe38ae8de0cdc52a12d8eb1096e/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7", size = 30872, upload-time = "2025-12-16T00:33:58.785Z" },
{ url = "https://files.pythonhosted.org/packages/f6/fd/33aa4ec62b290477181c55bb1c9302c9698c58c0ce9a6ab4874abc8b0d60/google_crc32c-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15", size = 33243, upload-time = "2025-12-16T00:40:21.46Z" },
{ url = "https://files.pythonhosted.org/packages/71/03/4820b3bd99c9653d1a5210cb32f9ba4da9681619b4d35b6a052432df4773/google_crc32c-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a", size = 33608, upload-time = "2025-12-16T00:40:22.204Z" },
{ url = "https://files.pythonhosted.org/packages/7c/43/acf61476a11437bf9733fb2f70599b1ced11ec7ed9ea760fdd9a77d0c619/google_crc32c-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2", size = 34439, upload-time = "2025-12-16T00:35:20.458Z" },
{ url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" },
{ url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" },
{ url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" },
{ url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" },
{ url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" },
{ url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297, upload-time = "2025-12-16T00:23:20.709Z" },
{ url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867, upload-time = "2025-12-16T00:43:14.628Z" },
{ url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344, upload-time = "2025-12-16T00:40:24.742Z" },
{ url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694, upload-time = "2025-12-16T00:40:25.505Z" },
{ url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435, upload-time = "2025-12-16T00:35:22.107Z" },
{ url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301, upload-time = "2025-12-16T00:24:48.527Z" },
{ url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868, upload-time = "2025-12-16T00:48:12.163Z" },
{ url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381, upload-time = "2025-12-16T00:40:26.268Z" },
{ url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734, upload-time = "2025-12-16T00:40:27.028Z" },
{ url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878, upload-time = "2025-12-16T00:35:23.142Z" },
{ url = "https://files.pythonhosted.org/packages/52/c5/c171e4d8c44fec1422d801a6d2e5d7ddabd733eeda505c79730ee9607f07/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93", size = 28615, upload-time = "2025-12-16T00:40:29.298Z" },
{ url = "https://files.pythonhosted.org/packages/9c/97/7d75fe37a7a6ed171a2cf17117177e7aab7e6e0d115858741b41e9dd4254/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c", size = 28800, upload-time = "2025-12-16T00:40:30.322Z" },
]
[[package]]
name = "google-genai"
version = "1.52.0"
@@ -2191,6 +2315,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
]
[[package]]
name = "google-resumable-media"
version = "2.8.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-crc32c" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3f/d1/b1ea14b93b6b78f57fc580125de44e9f593ab88dd2460f1a8a8d18f74754/google_resumable_media-2.8.2.tar.gz", hash = "sha256:f3354a182ebd193ae3f42e3ef95e6c9b10f128320de23ac7637236713b1acd70", size = 2164510, upload-time = "2026-03-30T23:34:25.369Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5e/f8/50bfaf4658431ff9de45c5c3935af7ab01157a4903c603cd0eee6e78e087/google_resumable_media-2.8.2-py3-none-any.whl", hash = "sha256:82b6d8ccd11765268cdd2a2123f417ec806b8eef3000a9a38dfe3033da5fb220", size = 81511, upload-time = "2026-03-30T23:34:09.671Z" },
]
[[package]]
name = "googleapis-common-protos"
version = "1.72.0"
@@ -2203,6 +2339,11 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
]
[package.optional-dependencies]
grpc = [
{ name = "grpcio" },
]
[[package]]
name = "greenlet"
version = "3.2.4"
@@ -2253,6 +2394,85 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
]
[[package]]
name = "grpc-google-iam-v1"
version = "0.14.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "googleapis-common-protos", extra = ["grpc"] },
{ name = "grpcio" },
{ name = "protobuf" },
]
sdist = { url = "https://files.pythonhosted.org/packages/44/4f/d098419ad0bfc06c9ce440575f05aa22d8973b6c276e86ac7890093d3c37/grpc_google_iam_v1-0.14.4.tar.gz", hash = "sha256:392b3796947ed6334e61171d9ab06bf7eb357f554e5fc7556ad7aab6d0e17038", size = 23706, upload-time = "2026-04-01T01:57:49.813Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/89/22/c2dd50c09bf679bd38173656cd4402d2511e563b33bc88f90009cf50613c/grpc_google_iam_v1-0.14.4-py3-none-any.whl", hash = "sha256:412facc320fcbd94034b4df3d557662051d4d8adfa86e0ddb4dca70a3f739964", size = 32675, upload-time = "2026-04-01T01:57:47.69Z" },
]
[[package]]
name = "grpcio"
version = "1.80.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" },
{ url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" },
{ url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" },
{ url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" },
{ url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" },
{ url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" },
{ url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" },
{ url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" },
{ url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" },
{ url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" },
{ url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" },
{ url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" },
{ url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" },
{ url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" },
{ url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" },
{ url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" },
{ url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" },
{ url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" },
{ url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" },
{ url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" },
{ url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" },
{ url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" },
{ url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" },
{ url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" },
{ url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" },
{ url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" },
{ url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" },
{ url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" },
{ url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" },
{ url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" },
{ url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" },
{ url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" },
{ url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" },
{ url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" },
{ url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" },
{ url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" },
{ url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" },
{ url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" },
{ url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" },
{ url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" },
]
[[package]]
name = "grpcio-status"
version = "1.80.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "googleapis-common-protos" },
{ name = "grpcio" },
{ name = "protobuf" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/ed/105f619bdd00cb47a49aa2feea6232ea2bbb04199d52a22cc6a7d603b5cb/grpcio_status-1.80.0.tar.gz", hash = "sha256:df73802a4c89a3ea88aa2aff971e886fccce162bc2e6511408b3d67a144381cd", size = 13901, upload-time = "2026-03-30T08:54:34.784Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/80/58cd2dfc19a07d022abe44bde7c365627f6c7cb6f692ada6c65ca437d09a/grpcio_status-1.80.0-py3-none-any.whl", hash = "sha256:4b56990363af50dbf2c2ebb80f1967185c07d87aa25aa2bea45ddb75fc181dbe", size = 14638, upload-time = "2026-03-30T08:54:01.569Z" },
]
[[package]]
name = "h11"
version = "0.16.0"
@@ -3149,6 +3369,11 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e6/05/3516cc7386b220d388aa0bd833308c677e94eceb82b2756dd95e06f6a13f/litellm-1.81.6-py3-none-any.whl", hash = "sha256:573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a", size = 12224343, upload-time = "2026-02-01T04:02:23.704Z" },
]
[package.optional-dependencies]
google = [
{ name = "google-cloud-aiplatform" },
]
[[package]]
name = "locket"
version = "1.0.0"
@@ -4217,7 +4442,7 @@ dependencies = [
{ name = "fastapi" },
{ name = "google-genai" },
{ name = "kubernetes" },
{ name = "litellm" },
{ name = "litellm", extra = ["google"] },
{ name = "openai" },
{ name = "prometheus-client" },
{ name = "prometheus-fastapi-instrumentator" },
@@ -4391,7 +4616,7 @@ requires-dist = [
{ name = "fastapi", specifier = "==0.133.1" },
{ name = "google-genai", specifier = "==1.52.0" },
{ name = "kubernetes", specifier = ">=31.0.0" },
{ name = "litellm", specifier = "==1.81.6" },
{ name = "litellm", extras = ["google"], specifier = "==1.81.6" },
{ name = "openai", specifier = "==2.14.0" },
{ name = "prometheus-client", specifier = ">=0.21.1" },
{ name = "prometheus-fastapi-instrumentator", specifier = "==7.1.0" },

View File

@@ -82,7 +82,10 @@ ARG NODE_OPTIONS
# SENTRY_AUTH_TOKEN is injected via BuildKit secret mount so it is never written
# to any image layer, build cache, or registry manifest.
# Use NODE_OPTIONS in the build command
RUN --mount=type=secret,id=sentry_auth_token,env=SENTRY_AUTH_TOKEN \
RUN --mount=type=secret,id=sentry_auth_token \
if [ -f /run/secrets/sentry_auth_token ]; then \
export SENTRY_AUTH_TOKEN="$(cat /run/secrets/sentry_auth_token)"; \
fi && \
NODE_OPTIONS="${NODE_OPTIONS}" npx next build
# Step 2. Production image, copy all the files and run next

View File

@@ -15,6 +15,7 @@ type InteractiveStatefulVariant =
| "select-heavy"
| "select-card"
| "select-tinted"
| "select-input"
| "select-filter"
| "sidebar-heavy"
| "sidebar-light";
@@ -35,6 +36,7 @@ interface InteractiveStatefulProps
* - `"select-heavy"` — tinted selected background (for list rows, model pickers)
* - `"select-card"` — like select-heavy but filled state has a visible background (for cards/larger surfaces)
* - `"select-tinted"` — like select-heavy but with a tinted rest background
* - `"select-input"` — rests at neutral-00 (matches input bar), hover/open shows neutral-03 + border-01
* - `"select-filter"` — like select-tinted for empty/filled; selected state uses inverted tint backgrounds and inverted text (for filter buttons)
* - `"sidebar-heavy"` — sidebar navigation items: muted when unselected (text-03/text-02), bold when selected (text-04/text-03)
* - `"sidebar-light"` — sidebar navigation items: uniformly muted across all states (text-02/text-02)

View File

@@ -350,6 +350,41 @@
--interactive-foreground-icon: var(--text-01);
}
/* ---------------------------------------------------------------------------
Select-Input — Empty
Matches input bar background at rest, tints on hover/open.
--------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"] {
@apply bg-background-neutral-00;
--interactive-foreground: var(--text-04);
--interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"]:hover:not(
[data-disabled]
),
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-interaction="hover"]:not(
[data-disabled]
) {
@apply bg-background-neutral-03;
--interactive-foreground: var(--text-04);
--interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"]:active:not(
[data-disabled]
),
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-interaction="active"]:not(
[data-disabled]
) {
@apply bg-background-neutral-03;
--interactive-foreground: var(--text-05);
--interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-disabled] {
@apply bg-transparent;
--interactive-foreground: var(--text-01);
--interactive-foreground-icon: var(--text-01);
}
/* ---------------------------------------------------------------------------
Select-Tinted — Filled
--------------------------------------------------------------------------- */

View File

@@ -73,7 +73,10 @@ export const MemoizedAnchor = memo(
: undefined;
if (!associatedDoc && !associatedSubQuestion) {
return <>{children}</>;
// Citation not resolved yet (data still streaming) — hide the
// raw [[N]](url) link entirely. It will render as a chip once
// the citation/document data arrives.
return <></>;
}
let icon: React.ReactNode = null;

View File

@@ -44,6 +44,8 @@ export interface MultiModelPanelProps {
errorStackTrace?: string | null;
/** Additional error details */
errorDetails?: Record<string, any> | null;
/** Whether any model is still streaming — disables preferred selection */
isGenerating?: boolean;
}
/**
@@ -73,19 +75,24 @@ export default function MultiModelPanel({
isRetryable,
errorStackTrace,
errorDetails,
isGenerating,
}: MultiModelPanelProps) {
const ModelIcon = getModelIcon(provider, modelName);
const canSelect = !isHidden && !isPreferred && !isGenerating;
const handlePanelClick = useCallback(() => {
if (!isHidden && !isPreferred) onSelect();
}, [isHidden, isPreferred, onSelect]);
if (canSelect) onSelect();
}, [canSelect, onSelect]);
const header = (
<div
className={cn(
"rounded-12",
isPreferred ? "bg-background-tint-02" : "bg-background-tint-00"
"rounded-12 transition-colors",
isPreferred ? "bg-background-tint-02" : "bg-background-tint-00",
canSelect && "cursor-pointer hover:bg-background-tint-02"
)}
onClick={handlePanelClick}
>
<ContentAction
sizePreset="main-ui"
@@ -140,13 +147,7 @@ export default function MultiModelPanel({
}
return (
<div
className={cn(
"flex flex-col gap-3 min-w-0 rounded-16 transition-colors",
!isPreferred && "cursor-pointer hover:bg-background-tint-02"
)}
onClick={handlePanelClick}
>
<div className="flex flex-col gap-3 min-w-0 rounded-16">
{header}
{errorMessage ? (
<div className="p-4">

View File

@@ -1,6 +1,13 @@
"use client";
import { useState, useCallback, useMemo, useEffect, useRef } from "react";
import {
useState,
useCallback,
useMemo,
useEffect,
useLayoutEffect,
useRef,
} from "react";
import { FullChatState } from "@/app/app/message/messageComponents/interfaces";
import { Message } from "@/app/app/interfaces";
import { LlmManager } from "@/lib/hooks";
@@ -110,11 +117,27 @@ export default function MultiModelResponseView({
// Refs to each panel wrapper for height animation on deselect
const panelElsRef = useRef<Map<number, HTMLDivElement>>(new Map());
// Tracks which non-preferred panels overflow the preferred height cap
// Tracks which non-preferred panels overflow the preferred height cap.
// Measured via useLayoutEffect after maxHeight is applied to the DOM —
// ref callbacks fire before layout and can't reliably detect overflow.
const [overflowingPanels, setOverflowingPanels] = useState<Set<number>>(
new Set()
);
useLayoutEffect(() => {
if (preferredPanelHeight == null || preferredIndex === null) return;
const next = new Set<number>();
panelElsRef.current.forEach((el, idx) => {
if (idx === preferredIndex || hiddenPanels.has(idx)) return;
if (el.scrollHeight > el.clientHeight) next.add(idx);
});
setOverflowingPanels((prev) => {
if (prev.size === next.size && Array.from(prev).every((v) => next.has(v)))
return prev;
return next;
});
}, [preferredPanelHeight, preferredIndex, hiddenPanels, responses]);
const preferredPanelRef = useCallback((el: HTMLDivElement | null) => {
if (preferredRoRef.current) {
preferredRoRef.current.disconnect();
@@ -416,6 +439,7 @@ export default function MultiModelResponseView({
isRetryable: response.isRetryable,
errorStackTrace: response.errorStackTrace,
errorDetails: response.errorDetails,
isGenerating,
}),
[
preferredIndex,
@@ -429,6 +453,7 @@ export default function MultiModelResponseView({
onMessageSelection,
onRegenerate,
parentMessage,
isGenerating,
]
);
@@ -515,17 +540,6 @@ export default function MultiModelResponseView({
panelElsRef.current.delete(r.modelIndex);
}
if (isPref) preferredPanelRef(el);
if (capped && el) {
const doesOverflow = el.scrollHeight > el.clientHeight;
setOverflowingPanels((prev) => {
const had = prev.has(r.modelIndex);
if (doesOverflow === had) return prev;
const next = new Set(prev);
if (doesOverflow) next.add(r.modelIndex);
else next.delete(r.modelIndex);
return next;
});
}
}}
style={{
width: `${selectionEntered ? finalW : startW}px`,
@@ -536,21 +550,19 @@ export default function MultiModelResponseView({
: "none",
maxHeight: capped ? preferredPanelHeight : undefined,
overflow: capped ? "hidden" : undefined,
position: capped ? "relative" : undefined,
...(overflows
? {
maskImage:
"linear-gradient(to bottom, black calc(100% - 6rem), transparent 100%)",
WebkitMaskImage:
"linear-gradient(to bottom, black calc(100% - 6rem), transparent 100%)",
}
: {}),
}}
>
<div className={cn(isNonPref && "opacity-50")}>
<MultiModelPanel {...buildPanelProps(r, isNonPref)} />
</div>
{overflows && (
<div
className="absolute inset-x-0 bottom-0 h-24 pointer-events-none"
style={{
background:
"linear-gradient(to top, var(--background-tint-01) 0%, transparent 100%)",
}}
/>
)}
</div>
);
})}

View File

@@ -136,32 +136,49 @@ const AgentMessage = React.memo(function AgentMessage({
finalAnswerComing
);
// Memoize merged citations separately to avoid creating new object when neither source changed
// Merge streaming citation/document data with chatState props.
// NOTE: citationMap and documentMap from usePacketProcessor are mutated in
// place (same object reference), so we use citations.length / documentMap.size
// as change-detection proxies to bust the memo cache when new data arrives.
const mergedCitations = useMemo(
() => ({
...chatState.citations,
...citationMap,
}),
[chatState.citations, citationMap]
// eslint-disable-next-line react-hooks/exhaustive-deps
[chatState.citations, citationMap, citations.length]
);
// Create a chatState that uses streaming citations for immediate rendering
// This merges the prop citations with streaming citations, preferring streaming ones
// Memoized with granular dependencies to prevent cascading re-renders
// Merge streaming documentMap into chatState.docs so inline citation chips
// can resolve [1] → document even when chatState.docs is empty (multi-model).
const mergedDocs = useMemo(() => {
const propDocs = chatState.docs ?? [];
if (documentMap.size === 0) return propDocs;
const seen = new Set(propDocs.map((d) => d.document_id));
const extras = Array.from(documentMap.values()).filter(
(d) => !seen.has(d.document_id)
);
return extras.length > 0 ? [...propDocs, ...extras] : propDocs;
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [chatState.docs, documentMap, documentMap.size]);
// Create a chatState that uses streaming citations and documents for immediate rendering.
// Memoized with granular dependencies to prevent cascading re-renders.
// Note: chatState object is recreated upstream on every render, so we depend on
// individual fields instead of the whole object for proper memoization
// individual fields instead of the whole object for proper memoization.
const effectiveChatState = useMemo<FullChatState>(
() => ({
...chatState,
citations: mergedCitations,
docs: mergedDocs,
}),
[
chatState.agent,
chatState.docs,
chatState.setPresentingDocument,
chatState.overriddenModel,
chatState.researchType,
mergedCitations,
mergedDocs,
]
);

View File

@@ -59,7 +59,6 @@ function TTSButton({ text, voice, speed }: TTSButtonProps) {
// Surface streaming voice playback errors to the user via toast
useEffect(() => {
if (error) {
console.error("Voice playback error:", error);
toast.error(error);
}
}, [error]);

View File

@@ -81,6 +81,15 @@ export function ScrollableTable({
* Processes content for markdown rendering by handling code blocks and LaTeX
*/
export const processContent = (content: string): string => {
// Strip incomplete citation links at the end of streaming content.
// During typewriter animation, [[N]](url) is revealed character by character.
// ReactMarkdown can't parse an incomplete link and renders it as raw text.
// This regex removes any trailing partial citation pattern so only complete
// links are passed to the markdown parser.
content = content.replace(/\[\[\d+\]\]\([^)]*$/, "");
// Also strip a lone [[ or [[N] or [[N]] at the very end (before the URL part arrives)
content = content.replace(/\[\[(?:\d+\]?\]?)?$/, "");
const codeBlockRegex = /```(\w*)\n[\s\S]*?```|```[\s\S]*?$/g;
const matches = content.match(codeBlockRegex);

View File

@@ -34,7 +34,8 @@ export const PROVIDERS: ProviderConfig[] = [
providerName: LLMProviderName.ANTHROPIC,
recommended: true,
models: [
{ name: "claude-opus-4-6", label: "Claude Opus 4.6", recommended: true },
{ name: "claude-opus-4-7", label: "Claude Opus 4.7", recommended: true },
{ name: "claude-opus-4-6", label: "Claude Opus 4.6" },
{ name: "claude-sonnet-4-6", label: "Claude Sonnet 4.6" },
],
apiKeyPlaceholder: "sk-ant-...",

View File

@@ -5,12 +5,12 @@
export interface BuildLlmSelection {
providerName: string; // e.g., "build-mode-anthropic" (LLMProviderDescriptor.name)
provider: string; // e.g., "anthropic"
modelName: string; // e.g., "claude-opus-4-6"
modelName: string; // e.g., "claude-opus-4-7"
}
// Priority order for smart default LLM selection
const LLM_SELECTION_PRIORITY = [
{ provider: "anthropic", modelName: "claude-opus-4-6" },
{ provider: "anthropic", modelName: "claude-opus-4-7" },
{ provider: "openai", modelName: "gpt-5.2" },
{ provider: "openrouter", modelName: "minimax/minimax-m2.1" },
] as const;
@@ -63,10 +63,11 @@ export function getDefaultLlmSelection(
export const RECOMMENDED_BUILD_MODELS = {
preferred: {
provider: "anthropic",
modelName: "claude-opus-4-6",
displayName: "Claude Opus 4.6",
modelName: "claude-opus-4-7",
displayName: "Claude Opus 4.7",
},
alternatives: [
{ provider: "anthropic", modelName: "claude-opus-4-6" },
{ provider: "anthropic", modelName: "claude-sonnet-4-6" },
{ provider: "openai", modelName: "gpt-5.2" },
{ provider: "openai", modelName: "gpt-5.1-codex" },
@@ -148,7 +149,8 @@ export const BUILD_MODE_PROVIDERS: BuildModeProvider[] = [
providerName: "anthropic",
recommended: true,
models: [
{ name: "claude-opus-4-6", label: "Claude Opus 4.6", recommended: true },
{ name: "claude-opus-4-7", label: "Claude Opus 4.7", recommended: true },
{ name: "claude-opus-4-6", label: "Claude Opus 4.6" },
{ name: "claude-sonnet-4-6", label: "Claude Sonnet 4.6" },
],
apiKeyPlaceholder: "sk-ant-...",

View File

@@ -271,6 +271,22 @@ export default function UserLibraryModal({
/>
</Section>
{/* The exact cap is controlled by the backend env var
MAX_EMBEDDED_IMAGES_PER_FILE (default 500). This copy is
deliberately vague so it doesn't drift if the limit is
tuned per-deployment; the precise number is surfaced in
the rejection error the server returns. */}
<Section
flexDirection="row"
justifyContent="end"
padding={0.5}
height="fit"
>
<Text secondaryBody text03>
PDFs with many embedded images may be rejected.
</Text>
</Section>
{isLoading ? (
<Section padding={2} height="fit">
<Text secondaryBody text03>

View File

@@ -3,7 +3,7 @@
import { ValidSources } from "@/lib/types";
import { SourceIcon } from "./SourceIcon";
import { useState } from "react";
import { OnyxIcon } from "./icons/icons";
import { GithubIcon, OnyxIcon } from "./icons/icons";
export function WebResultIcon({
url,
@@ -23,6 +23,8 @@ export function WebResultIcon({
<>
{hostname.includes("onyx.app") ? (
<OnyxIcon size={size} className="dark:text-[#fff] text-[#000]" />
) : hostname === "github.com" || hostname.endsWith(".github.com") ? (
<GithubIcon size={size} />
) : !error ? (
<img
className="my-0 rounded-full py-0"

View File

@@ -46,6 +46,7 @@ import freshdeskIcon from "@public/Freshdesk.png";
import geminiSVG from "@public/Gemini.svg";
import gitbookDarkIcon from "@public/GitBookDark.png";
import gitbookLightIcon from "@public/GitBookLight.png";
import githubDarkIcon from "@public/GithubDarkMode.png";
import githubLightIcon from "@public/Github.png";
import gongIcon from "@public/Gong.png";
import googleIcon from "@public/Google.png";
@@ -855,7 +856,7 @@ export const GitbookIcon = createLogoIcon(gitbookDarkIcon, {
darkSrc: gitbookLightIcon,
});
export const GithubIcon = createLogoIcon(githubLightIcon, {
monochromatic: true,
darkSrc: githubDarkIcon,
});
export const GitlabIcon = createLogoIcon(gitlabIcon);
export const GmailIcon = createLogoIcon(gmailIcon);

View File

@@ -12,9 +12,9 @@ interface LLMOption {
value: string;
icon: ReturnType<typeof getModelIcon>;
modelName: string;
providerId: number;
providerName: string;
provider: string;
providerDisplayName: string;
supportsImageInput: boolean;
vendor: string | null;
}
@@ -64,7 +64,7 @@ export default function LLMSelector({
return;
}
const key = `${provider.provider}:${modelConfiguration.name}`;
const key = `${provider.id}:${modelConfiguration.name}`;
if (seenKeys.has(key)) {
return; // Skip exact duplicate
}
@@ -87,10 +87,9 @@ export default function LLMSelector({
),
icon: getModelIcon(provider.provider, modelConfiguration.name),
modelName: modelConfiguration.name,
providerId: provider.id,
providerName: provider.name,
provider: provider.provider,
providerDisplayName:
provider.provider_display_name || provider.provider,
supportsImageInput,
vendor: modelConfiguration.vendor || null,
};
@@ -108,33 +107,34 @@ export default function LLMSelector({
requiresImageGeneration,
]);
// Group options by provider using backend-provided display names
// Group options by configured provider instance so multiple instances of the
// same provider type (e.g., two Anthropic API keys) appear as separate groups
// labeled with their user-given names.
const groupedOptions = useMemo(() => {
const groups = new Map<
string,
number,
{ displayName: string; options: LLMOption[] }
>();
llmOptions.forEach((option) => {
const provider = option.provider.toLowerCase();
if (!groups.has(provider)) {
groups.set(provider, {
displayName: option.providerDisplayName,
if (!groups.has(option.providerId)) {
groups.set(option.providerId, {
displayName: option.providerName,
options: [],
});
}
groups.get(provider)!.options.push(option);
groups.get(option.providerId)!.options.push(option);
});
// Sort groups alphabetically by display name
const sortedProviders = Array.from(groups.keys()).sort((a, b) =>
const sortedProviderIds = Array.from(groups.keys()).sort((a, b) =>
groups.get(a)!.displayName.localeCompare(groups.get(b)!.displayName)
);
return sortedProviders.map((provider) => {
const group = groups.get(provider)!;
return sortedProviderIds.map((providerId) => {
const group = groups.get(providerId)!;
return {
provider,
providerId,
displayName: group.displayName,
options: group.options,
};
@@ -179,7 +179,7 @@ export default function LLMSelector({
)}
{showGrouped
? groupedOptions.map((group) => (
<InputSelect.Group key={group.provider}>
<InputSelect.Group key={group.providerId}>
<InputSelect.Label>{group.displayName}</InputSelect.Label>
{group.options.map((option) => (
<InputSelect.Item

View File

@@ -106,9 +106,23 @@ export default function useMultiModelChat(
[currentLlmModel]
);
const removeModel = useCallback((index: number) => {
setSelectedModels((prev) => prev.filter((_, i) => i !== index));
}, []);
const removeModel = useCallback(
(index: number) => {
const next = selectedModels.filter((_, i) => i !== index);
// When dropping to single-model, switch llmManager to the surviving
// model so it becomes the active model instead of reverting to the
// user's default.
if (next.length === 1 && next[0]) {
llmManager.updateCurrentLlm({
name: next[0].name,
provider: next[0].provider,
modelName: next[0].modelName,
});
}
setSelectedModels(next);
},
[selectedModels, llmManager]
);
const replaceModel = useCallback(
(index: number, model: SelectedModel) => {

View File

@@ -110,6 +110,23 @@ export function useTypewriter(target: string, enabled: boolean): string {
}
}, [target.length, displayedLength]);
// When the user navigates away and back (tab switch, window focus),
// snap to all collected content so they see the full response immediately.
useEffect(() => {
const handleVisibility = () => {
if (document.visibilityState === "visible") {
const targetLen = targetRef.current.length;
if (displayedLengthRef.current < targetLen) {
displayedLengthRef.current = targetLen;
setDisplayedLength(targetLen);
}
}
};
document.addEventListener("visibilitychange", handleVisibility);
return () =>
document.removeEventListener("visibilitychange", handleVisibility);
}, []);
return useMemo(
() => target.slice(0, Math.min(displayedLength, target.length)),
[target, displayedLength]

View File

@@ -173,8 +173,13 @@ function AttachmentItemLayout({
rightChildren,
}: AttachmentItemLayoutProps) {
return (
<Section flexDirection="row" gap={0.25} padding={0.25}>
<div className={cn("h-[2.25rem] aspect-square rounded-08")}>
<Section
flexDirection="row"
justifyContent="start"
gap={0.25}
padding={0.25}
>
<div className={cn("h-[2.25rem] aspect-square rounded-08 flex-shrink-0")}>
<Section>
<div
className="attachment-button__icon-wrapper"
@@ -189,6 +194,7 @@ function AttachmentItemLayout({
justifyContent="between"
alignItems="center"
gap={1.5}
className="min-w-0"
>
<div data-testid="attachment-item-title" className="flex-1 min-w-0">
<Content

View File

@@ -53,18 +53,17 @@ export class HTTPStreamingTTSPlayer {
// Create abort controller for this request
this.abortController = new AbortController();
// Build URL with query params
const params = new URLSearchParams();
params.set("text", text);
if (voice) params.set("voice", voice);
params.set("speed", speed.toString());
const url = `${this.getAPIUrl()}?${params}`;
const url = this.getAPIUrl();
const body = JSON.stringify({
text,
...(voice && { voice }),
speed,
});
// Check if MediaSource is supported
if (!window.MediaSource || !MediaSource.isTypeSupported("audio/mpeg")) {
// Fallback to simple buffered playback
return this.fallbackSpeak(url);
return this.fallbackSpeak(url, body);
}
// Create MediaSource and audio element
@@ -129,15 +128,21 @@ export class HTTPStreamingTTSPlayer {
try {
const response = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body,
signal: this.abortController.signal,
credentials: "include", // Include cookies for authentication
credentials: "include",
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(
`TTS request failed: ${response.status} - ${errorText}`
);
let message = `TTS request failed (${response.status})`;
try {
const errorJson = await response.json();
if (errorJson.detail) message = errorJson.detail;
} catch {
// response wasn't JSON — use status text
}
throw new Error(message);
}
const reader = response.body?.getReader();
@@ -242,16 +247,24 @@ export class HTTPStreamingTTSPlayer {
* Fallback for browsers that don't support MediaSource Extensions.
* Buffers all audio before playing.
*/
private async fallbackSpeak(url: string): Promise<void> {
private async fallbackSpeak(url: string, body: string): Promise<void> {
const response = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body,
signal: this.abortController?.signal,
credentials: "include", // Include cookies for authentication
credentials: "include",
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`TTS request failed: ${response.status} - ${errorText}`);
let message = `TTS request failed (${response.status})`;
try {
const errorJson = await response.json();
if (errorJson.detail) message = errorJson.detail;
} catch {
// response wasn't JSON — use status text
}
throw new Error(message);
}
const audioData = await response.arrayBuffer();

View File

@@ -82,6 +82,7 @@ export interface LineItemProps
selected?: boolean;
icon?: React.FunctionComponent<IconProps>;
strokeIcon?: boolean;
description?: string;
rightChildren?: React.ReactNode;
href?: string;
@@ -154,6 +155,7 @@ export default function LineItem({
skeleton,
emphasized,
icon: Icon,
strokeIcon = true,
description,
children,
rightChildren,
@@ -245,7 +247,12 @@ export default function LineItem({
!!(children && description) && "mt-0.5"
)}
>
<Icon className={cn("h-[1rem] w-[1rem]", iconClassNames[variant])} />
<Icon
className={cn(
"h-[1rem] w-[1rem]",
strokeIcon && iconClassNames[variant]
)}
/>
</div>
)}
<Section alignItems="start" gap={0}>

View File

@@ -103,6 +103,7 @@ export default function SwitchList({
item.leading) as React.FunctionComponent<IconProps>)
: undefined
}
strokeIcon={false}
rightChildren={
<Switch
checked={item.isEnabled}

View File

@@ -172,6 +172,7 @@ export default function ModelListContent({
<LineItem
muted
icon={group.Icon}
strokeIcon={false}
rightChildren={
open ? (
<SvgChevronDown className="h-4 w-4 stroke-text-04 shrink-0" />

View File

@@ -4,7 +4,7 @@ import { useState, useMemo, useRef } from "react";
import Popover from "@/refresh-components/Popover";
import { LlmManager } from "@/lib/hooks";
import { getModelIcon } from "@/lib/llmConfig";
import { Button, SelectButton, OpenButton } from "@opal/components";
import { Button, SelectButton } from "@opal/components";
import { SvgPlusCircle, SvgX } from "@opal/icons";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { LLMOption } from "@/refresh-components/popovers/interfaces";
@@ -109,7 +109,10 @@ export default function ModelSelector({
onRemove(existingIndex);
} else if (!atMax) {
onAdd(model);
setOpen(false);
// Close the popover only when we've reached the max model count
if (selectedModels.length + 1 >= MAX_MODELS) {
setOpen(false);
}
}
};
@@ -163,26 +166,13 @@ export default function ModelSelector({
model.modelName
);
if (!isMultiModel) {
// Stable key — keying on model would unmount the pill
// on change and leave Radix's anchorRef detached,
// flashing the closing popover at (0,0).
return (
<OpenButton
key="single-model-pill"
icon={ProviderIcon}
onClick={(e: React.MouseEvent) =>
handlePillClick(index, e.currentTarget as HTMLElement)
}
>
{model.displayName}
</OpenButton>
);
}
return (
<div
key={modelKey(model.provider, model.modelName)}
key={
isMultiModel
? modelKey(model.provider, model.modelName)
: "single-model-pill"
}
className="flex items-center"
>
{index > 0 && (
@@ -194,23 +184,24 @@ export default function ModelSelector({
)}
<SelectButton
icon={ProviderIcon}
rightIcon={SvgX}
rightIcon={isMultiModel ? SvgX : undefined}
state="empty"
variant="select-tinted"
interaction="hover"
variant="select-input"
size="lg"
onClick={(e: React.MouseEvent) => {
const target = e.target as HTMLElement;
const btn = e.currentTarget as HTMLElement;
const icons = btn.querySelectorAll(
".interactive-foreground-icon"
);
const lastIcon = icons[icons.length - 1];
if (lastIcon && lastIcon.contains(target)) {
onRemove(index);
} else {
handlePillClick(index, btn);
if (isMultiModel) {
const target = e.target as HTMLElement;
const btn = e.currentTarget as HTMLElement;
const icons = btn.querySelectorAll(
".interactive-foreground-icon"
);
const lastIcon = icons[icons.length - 1];
if (lastIcon && lastIcon.contains(target)) {
onRemove(index);
return;
}
}
handlePillClick(index, e.currentTarget as HTMLElement);
}}
>
{model.displayName}
@@ -224,7 +215,7 @@ export default function ModelSelector({
</div>
{!(atMax && replacingIndex === null) && (
<Popover.Content side="top" align="end" width="lg">
<Popover.Content side="top" align="end" width="xl">
<ModelListContent
llmProviders={llmManager.llmProviders}
isLoading={llmManager.isLoadingProviders}

View File

@@ -146,6 +146,7 @@ function SharedGroupResources({
interactive={!dimmed}
muted={dimmed}
icon={getSourceMetadata(p.connector.source).icon}
strokeIcon={false}
rightChildren={
p.groups.length > 0 || dimmed ? <SharedBadge /> : undefined
}

View File

@@ -186,6 +186,7 @@ export default function UserFilters({
<LineItem
key={role}
icon={isSelected ? SvgCheck : roleIcon}
strokeIcon={isSelected || role !== UserRole.SLACK_USER}
selected={isSelected}
emphasized={isSelected}
onClick={() => toggleRole(role)}

View File

@@ -184,20 +184,18 @@ export function FileCard({
}
>
<div className="min-w-0 max-w-[12rem]">
<Interactive.Container border heightVariant="fit">
<div className="[&_.opal-content-md-title-row]:min-w-0 [&_.opal-content-md-title]:break-all">
<AttachmentItemLayout
icon={isProcessing ? SimpleLoader : SvgFileText}
title={file.name}
description={
isProcessing
? file.status === UserFileStatus.UPLOADING
? "Uploading..."
: "Processing..."
: typeLabel
}
/>
</div>
<Interactive.Container border heightVariant="fit" widthVariant="full">
<AttachmentItemLayout
icon={isProcessing ? SimpleLoader : SvgFileText}
title={file.name}
description={
isProcessing
? file.status === UserFileStatus.UPLOADING
? "Uploading..."
: "Processing..."
: typeLabel
}
/>
<Spacer horizontal rem={0.5} />
</Interactive.Container>
</div>

View File

@@ -131,6 +131,7 @@ function KnowledgeSidebar({
<LineItem
key={connectedSource.source}
icon={sourceMetadata.icon}
strokeIcon={false}
onClick={() => onNavigateToSource(connectedSource.source)}
selected={isActive}
emphasized={isActive || isSelected || selectionCount > 0}
@@ -720,6 +721,7 @@ const KnowledgeAddView = memo(function KnowledgeAddView({
<LineItem
key={connectedSource.source}
icon={sourceMetadata.icon}
strokeIcon={false}
onClick={() => onNavigateToSource(connectedSource.source)}
emphasized={isSelected || selectionCount > 0}
aria-label={`knowledge-add-source-${connectedSource.source}`}