mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-20 07:02:41 +00:00
Compare commits
12 Commits
bo/hook_ex
...
bo/hook_fr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5b03f8753a | ||
|
|
e91e275f9d | ||
|
|
710b39074f | ||
|
|
8fe2f67d38 | ||
|
|
f00aaf9fc0 | ||
|
|
5b2426b002 | ||
|
|
ba6ab0245b | ||
|
|
b64ebb57e1 | ||
|
|
2fcfdbabde | ||
|
|
ea1a2749c1 | ||
|
|
73c4e22588 | ||
|
|
fceaac6e13 |
@@ -9,12 +9,12 @@ from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
|
||||
from onyx.configs.app_configs import DISABLE_VECTOR_DB
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
|
||||
from onyx.configs.app_configs import HOOK_ENABLED
|
||||
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
|
||||
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.hooks.utils import HOOKS_AVAILABLE
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
# choosing 15 minutes because it roughly gives us enough time to process many tasks
|
||||
@@ -362,7 +362,7 @@ if not MULTI_TENANT:
|
||||
|
||||
tasks_to_schedule.extend(beat_task_templates)
|
||||
|
||||
if HOOKS_AVAILABLE:
|
||||
if not MULTI_TENANT and HOOK_ENABLED:
|
||||
tasks_to_schedule.append(
|
||||
{
|
||||
"name": "hook-execution-log-cleanup",
|
||||
|
||||
@@ -24,6 +24,7 @@ from onyx.configs.app_configs import MANAGED_VESPA
|
||||
from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
|
||||
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
|
||||
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
|
||||
@@ -33,6 +34,7 @@ from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
|
||||
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
|
||||
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
|
||||
from onyx.connectors.file.connector import LocalFileConnector
|
||||
@@ -91,6 +93,17 @@ def _user_file_delete_lock_key(user_file_id: str | UUID) -> str:
|
||||
return f"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:
|
||||
"""Key that exists while a delete_single_user_file task is sitting in the queue.
|
||||
|
||||
The beat generator sets this with a TTL equal to CELERY_USER_FILE_DELETE_TASK_EXPIRES
|
||||
before enqueuing and the worker deletes it as its first action. This prevents
|
||||
the beat from adding duplicate tasks for files that already have a live task
|
||||
in flight.
|
||||
"""
|
||||
return f"{OnyxRedisLocks.USER_FILE_DELETE_QUEUED_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
|
||||
redis_celery: Redis = celery_app.broker_connection().channel().client # type: ignore
|
||||
return celery_get_queue_length(
|
||||
@@ -546,7 +559,23 @@ def process_single_user_file(
|
||||
ignore_result=True,
|
||||
)
|
||||
def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
|
||||
"""Scan for user files with DELETING status and enqueue per-file tasks."""
|
||||
"""Scan for user files with DELETING status and enqueue per-file tasks.
|
||||
|
||||
Three mechanisms prevent queue runaway (mirrors check_user_file_processing):
|
||||
|
||||
1. **Queue depth backpressure** – if the broker queue already has more than
|
||||
USER_FILE_DELETE_MAX_QUEUE_DEPTH items we skip this beat cycle entirely.
|
||||
|
||||
2. **Per-file queued guard** – before enqueuing a task we set a short-lived
|
||||
Redis key (TTL = CELERY_USER_FILE_DELETE_TASK_EXPIRES). If that key
|
||||
already exists the file already has a live task in the queue, so we skip
|
||||
it. The worker deletes the key the moment it picks up the task so the
|
||||
next beat cycle can re-enqueue if the file is still DELETING.
|
||||
|
||||
3. **Task expiry** – every enqueued task carries an `expires` value equal to
|
||||
CELERY_USER_FILE_DELETE_TASK_EXPIRES. If a task is still sitting in
|
||||
the queue after that deadline, Celery discards it without touching the DB.
|
||||
"""
|
||||
task_logger.info("check_for_user_file_delete - Starting")
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
lock: RedisLock = redis_client.lock(
|
||||
@@ -555,8 +584,23 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
|
||||
)
|
||||
if not lock.acquire(blocking=False):
|
||||
return None
|
||||
|
||||
enqueued = 0
|
||||
skipped_guard = 0
|
||||
try:
|
||||
# --- Protection 1: queue depth backpressure ---
|
||||
# NOTE: must use the broker's Redis client (not redis_client) because
|
||||
# Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
|
||||
r_celery: Redis = self.app.broker_connection().channel().client # type: ignore
|
||||
queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
|
||||
if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
|
||||
task_logger.warning(
|
||||
f"check_for_user_file_delete - Queue depth {queue_len} exceeds "
|
||||
f"{USER_FILE_DELETE_MAX_QUEUE_DEPTH}, skipping enqueue for "
|
||||
f"tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
user_file_ids = (
|
||||
db_session.execute(
|
||||
@@ -568,23 +612,40 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
|
||||
.all()
|
||||
)
|
||||
for user_file_id in user_file_ids:
|
||||
self.app.send_task(
|
||||
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
|
||||
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_DELETE,
|
||||
priority=OnyxCeleryPriority.HIGH,
|
||||
# --- Protection 2: per-file queued guard ---
|
||||
queued_key = _user_file_delete_queued_key(user_file_id)
|
||||
guard_set = redis_client.set(
|
||||
queued_key,
|
||||
1,
|
||||
ex=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
|
||||
nx=True,
|
||||
)
|
||||
if not guard_set:
|
||||
skipped_guard += 1
|
||||
continue
|
||||
|
||||
# --- Protection 3: task expiry ---
|
||||
try:
|
||||
self.app.send_task(
|
||||
OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
|
||||
kwargs={
|
||||
"user_file_id": str(user_file_id),
|
||||
"tenant_id": tenant_id,
|
||||
},
|
||||
queue=OnyxCeleryQueues.USER_FILE_DELETE,
|
||||
priority=OnyxCeleryPriority.HIGH,
|
||||
expires=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
|
||||
)
|
||||
except Exception:
|
||||
redis_client.delete(queued_key)
|
||||
raise
|
||||
enqueued += 1
|
||||
except Exception as e:
|
||||
task_logger.exception(
|
||||
f"check_for_user_file_delete - Error enqueuing deletes - {e.__class__.__name__}"
|
||||
)
|
||||
return None
|
||||
finally:
|
||||
if lock.owned():
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"check_for_user_file_delete - Enqueued {enqueued} tasks for tenant={tenant_id}"
|
||||
f"check_for_user_file_delete - Enqueued {enqueued} tasks, skipped_guard={skipped_guard} for tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -602,6 +663,9 @@ def delete_user_file_impl(
|
||||
file_lock: RedisLock | None = None
|
||||
if redis_locking:
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
# Clear the queued guard so the beat can re-enqueue if deletion fails
|
||||
# and the file remains in DELETING status.
|
||||
redis_client.delete(_user_file_delete_queued_key(user_file_id))
|
||||
file_lock = redis_client.lock(
|
||||
_user_file_delete_lock_key(user_file_id),
|
||||
timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
|
||||
|
||||
@@ -278,14 +278,17 @@ USING_AWS_MANAGED_OPENSEARCH = (
|
||||
OPENSEARCH_PROFILING_DISABLED = (
|
||||
os.environ.get("OPENSEARCH_PROFILING_DISABLED", "").lower() == "true"
|
||||
)
|
||||
|
||||
# Whether to disable match highlights for OpenSearch. Defaults to True for now
|
||||
# as we investigate query performance.
|
||||
OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED = (
|
||||
os.environ.get("OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED", "true").lower() == "true"
|
||||
)
|
||||
# When enabled, OpenSearch returns detailed score breakdowns for each hit.
|
||||
# Useful for debugging and tuning search relevance. Has ~10-30% performance overhead according to documentation.
|
||||
# Seems for Hybrid Search in practice, the impact is actually more like 1000x slower.
|
||||
OPENSEARCH_EXPLAIN_ENABLED = (
|
||||
os.environ.get("OPENSEARCH_EXPLAIN_ENABLED", "").lower() == "true"
|
||||
)
|
||||
|
||||
# Analyzer used for full-text fields (title, content). Use OpenSearch built-in analyzer
|
||||
# names (e.g. "english", "standard", "german"). Affects stemming and tokenization;
|
||||
# existing indices need reindexing after a change.
|
||||
|
||||
@@ -177,6 +177,14 @@ USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500
|
||||
|
||||
CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
|
||||
|
||||
# How long a queued user-file-delete task is valid before workers discard it.
|
||||
# Mirrors the processing task expiry to prevent indefinite queue growth when
|
||||
# files are stuck in DELETING status and the beat keeps re-enqueuing them.
|
||||
CELERY_USER_FILE_DELETE_TASK_EXPIRES = 60 # 1 minute (in seconds)
|
||||
|
||||
# Max queue depth before the delete beat stops enqueuing more delete tasks.
|
||||
USER_FILE_DELETE_MAX_QUEUE_DEPTH = 500
|
||||
|
||||
CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
|
||||
|
||||
DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
|
||||
@@ -469,6 +477,9 @@ class OnyxRedisLocks:
|
||||
USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = "da_lock:user_file_project_sync_queued"
|
||||
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
|
||||
USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
|
||||
# Short-lived key set when a delete task is enqueued; cleared when the worker picks it up.
|
||||
# Prevents the beat from re-enqueuing the same file while a delete task is already queued.
|
||||
USER_FILE_DELETE_QUEUED_PREFIX = "da_lock:user_file_delete_queued"
|
||||
|
||||
# Release notes
|
||||
RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"
|
||||
|
||||
@@ -12,6 +12,7 @@ from sqlalchemy.orm import Session
|
||||
from starlette.background import BackgroundTasks
|
||||
|
||||
from onyx.configs.app_configs import DISABLE_VECTOR_DB
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
from onyx.configs.constants import FileOrigin
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
@@ -144,6 +145,7 @@ def upload_files_to_user_files_with_indexing(
|
||||
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
|
||||
priority=OnyxCeleryPriority.HIGH,
|
||||
expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
|
||||
)
|
||||
logger.info(
|
||||
f"Triggered indexing for user_file_id={user_file.id} with task_id={task.id}"
|
||||
|
||||
@@ -7,6 +7,7 @@ from uuid import UUID
|
||||
|
||||
from onyx.configs.app_configs import DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S
|
||||
from onyx.configs.app_configs import OPENSEARCH_EXPLAIN_ENABLED
|
||||
from onyx.configs.app_configs import OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED
|
||||
from onyx.configs.app_configs import OPENSEARCH_PROFILING_DISABLED
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import INDEX_SEPARATOR
|
||||
@@ -364,9 +365,6 @@ class DocumentQuery:
|
||||
attached_document_ids=index_filters.attached_document_ids,
|
||||
hierarchy_node_ids=index_filters.hierarchy_node_ids,
|
||||
)
|
||||
match_highlights_configuration = (
|
||||
DocumentQuery._get_match_highlights_configuration()
|
||||
)
|
||||
|
||||
# See https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
|
||||
hybrid_search_query: dict[str, Any] = {
|
||||
@@ -393,7 +391,6 @@ class DocumentQuery:
|
||||
final_hybrid_search_body: dict[str, Any] = {
|
||||
"query": hybrid_search_query,
|
||||
"size": num_hits,
|
||||
"highlight": match_highlights_configuration,
|
||||
"timeout": f"{DEFAULT_OPENSEARCH_QUERY_TIMEOUT_S}s",
|
||||
# Exclude retrieving the vector fields in order to save on
|
||||
# retrieval cost as we don't need them upstream.
|
||||
@@ -402,6 +399,11 @@ class DocumentQuery:
|
||||
},
|
||||
}
|
||||
|
||||
if not OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED:
|
||||
final_hybrid_search_body["highlight"] = (
|
||||
DocumentQuery._get_match_highlights_configuration()
|
||||
)
|
||||
|
||||
# Explain is for scoring breakdowns.
|
||||
if OPENSEARCH_EXPLAIN_ENABLED:
|
||||
final_hybrid_search_body["explain"] = True
|
||||
|
||||
@@ -88,7 +88,6 @@ class OnyxErrorCode(Enum):
|
||||
SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
|
||||
BAD_GATEWAY = ("BAD_GATEWAY", 502)
|
||||
LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
|
||||
HOOK_EXECUTION_FAILED = ("HOOK_EXECUTION_FAILED", 502)
|
||||
GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)
|
||||
|
||||
def __init__(self, code: str, status_code: int) -> None:
|
||||
|
||||
@@ -88,9 +88,13 @@ def summarize_image_with_error_handling(
|
||||
try:
|
||||
return summarize_image_pipeline(llm, image_data, user_prompt, system_prompt)
|
||||
except UnsupportedImageFormatError:
|
||||
magic_hex = image_data[:8].hex() if image_data else "empty"
|
||||
logger.info(
|
||||
"Skipping image summarization due to unsupported MIME type for %s",
|
||||
"Skipping image summarization due to unsupported MIME type "
|
||||
"for %s (magic_bytes=%s, size=%d bytes)",
|
||||
context_name,
|
||||
magic_hex,
|
||||
len(image_data),
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -134,9 +138,23 @@ def _summarize_image(
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Summarization failed. Messages: {messages}"
|
||||
error_msg = error_msg[:1024]
|
||||
raise ValueError(error_msg) from e
|
||||
# Extract structured details from LiteLLM exceptions when available,
|
||||
# rather than dumping the full messages payload (which contains base64
|
||||
# image data and produces enormous, unreadable error logs).
|
||||
str_e = str(e)
|
||||
if len(str_e) > 512:
|
||||
str_e = str_e[:512] + "... (truncated)"
|
||||
parts = [f"Summarization failed: {type(e).__name__}: {str_e}"]
|
||||
status_code = getattr(e, "status_code", None)
|
||||
llm_provider = getattr(e, "llm_provider", None)
|
||||
model = getattr(e, "model", None)
|
||||
if status_code is not None:
|
||||
parts.append(f"status_code={status_code}")
|
||||
if llm_provider is not None:
|
||||
parts.append(f"llm_provider={llm_provider}")
|
||||
if model is not None:
|
||||
parts.append(f"model={model}")
|
||||
raise ValueError(" | ".join(parts)) from e
|
||||
|
||||
|
||||
def _encode_image_for_llm_prompt(image_data: bytes) -> str:
|
||||
|
||||
@@ -1,330 +0,0 @@
|
||||
"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
|
||||
|
||||
Usage (Celery tasks and FastAPI handlers):
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
|
||||
)
|
||||
|
||||
if isinstance(result, HookSkipped):
|
||||
# no active hook configured — continue with original behavior
|
||||
...
|
||||
elif isinstance(result, HookSoftFailed):
|
||||
# hook failed but fail strategy is SOFT — continue with original behavior
|
||||
...
|
||||
else:
|
||||
# result is the response payload dict from the customer's endpoint
|
||||
...
|
||||
|
||||
is_reachable update policy
|
||||
--------------------------
|
||||
``is_reachable`` on the Hook row is updated selectively — only when the outcome
|
||||
carries meaningful signal about physical reachability:
|
||||
|
||||
NetworkError (DNS, connection refused) → False (cannot reach the server)
|
||||
HTTP 401 / 403 → False (api_key revoked or invalid)
|
||||
TimeoutException → None (server may be slow, skip write)
|
||||
Other HTTP errors (4xx / 5xx) → None (server responded, skip write)
|
||||
Unknown exception → None (no signal, skip write)
|
||||
Non-JSON / non-dict response → None (server responded, skip write)
|
||||
Success (2xx, valid dict) → True (confirmed reachable)
|
||||
|
||||
None means "leave the current value unchanged" — no DB round-trip is made.
|
||||
|
||||
DB session design
|
||||
-----------------
|
||||
The executor uses three sessions:
|
||||
|
||||
1. Caller's session (db_session) — used only for the hook lookup read. All
|
||||
needed fields are extracted from the Hook object before the HTTP call, so
|
||||
the caller's session is not held open during the external HTTP request.
|
||||
|
||||
2. Log session — a separate short-lived session opened after the HTTP call
|
||||
completes to write the HookExecutionLog row on failure. Success runs are
|
||||
not recorded. Committed independently of everything else.
|
||||
|
||||
3. Reachable session — a second short-lived session to update is_reachable on
|
||||
the Hook. Kept separate from the log session so a concurrent hook deletion
|
||||
(which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
|
||||
prevent the execution log from being written. This update is best-effort.
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.db.hook import create_hook_execution_log__no_commit
|
||||
from onyx.db.hook import get_non_deleted_hook_by_hook_point
|
||||
from onyx.db.hook import update_hook__no_commit
|
||||
from onyx.db.models import Hook
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.hooks.utils import HOOKS_AVAILABLE
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class HookSkipped:
|
||||
"""No active hook configured for this hook point."""
|
||||
|
||||
|
||||
class HookSoftFailed:
|
||||
"""Hook was called but failed with SOFT fail strategy — continuing."""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _HttpOutcome(BaseModel):
|
||||
"""Structured result of an HTTP hook call, returned by _process_response."""
|
||||
|
||||
is_success: bool
|
||||
updated_is_reachable: (
|
||||
bool | None
|
||||
) # True/False = write to DB, None = unchanged (skip write)
|
||||
status_code: int | None
|
||||
error_message: str | None
|
||||
response_payload: dict[str, Any] | None
|
||||
|
||||
|
||||
def _lookup_hook(
|
||||
db_session: Session,
|
||||
hook_point: HookPoint,
|
||||
) -> Hook | HookSkipped:
|
||||
"""Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.
|
||||
|
||||
No HTTP call is made and no DB writes are performed for any HookSkipped path.
|
||||
There is nothing to log and no reachability information to update.
|
||||
"""
|
||||
if not HOOKS_AVAILABLE:
|
||||
return HookSkipped()
|
||||
hook = get_non_deleted_hook_by_hook_point(
|
||||
db_session=db_session, hook_point=hook_point
|
||||
)
|
||||
if hook is None or not hook.is_active:
|
||||
return HookSkipped()
|
||||
if not hook.endpoint_url:
|
||||
return HookSkipped()
|
||||
return hook
|
||||
|
||||
|
||||
def _process_response(
|
||||
*,
|
||||
response: httpx.Response | None,
|
||||
exc: Exception | None,
|
||||
timeout: float,
|
||||
) -> _HttpOutcome:
|
||||
"""Process the result of an HTTP call and return a structured outcome.
|
||||
|
||||
Called after the client.post() try/except. If post() raised, exc is set and
|
||||
response is None. Otherwise response is set and exc is None. Handles
|
||||
raise_for_status(), JSON decoding, and the dict shape check.
|
||||
"""
|
||||
if exc is not None:
|
||||
if isinstance(exc, httpx.NetworkError):
|
||||
msg = f"Hook network error (endpoint unreachable): {exc}"
|
||||
logger.warning(msg, exc_info=exc)
|
||||
return _HttpOutcome(
|
||||
is_success=False,
|
||||
updated_is_reachable=False,
|
||||
status_code=None,
|
||||
error_message=msg,
|
||||
response_payload=None,
|
||||
)
|
||||
if isinstance(exc, httpx.TimeoutException):
|
||||
msg = f"Hook timed out after {timeout}s: {exc}"
|
||||
logger.warning(msg, exc_info=exc)
|
||||
return _HttpOutcome(
|
||||
is_success=False,
|
||||
updated_is_reachable=None, # timeout doesn't indicate unreachability
|
||||
status_code=None,
|
||||
error_message=msg,
|
||||
response_payload=None,
|
||||
)
|
||||
msg = f"Hook call failed: {exc}"
|
||||
logger.exception(msg, exc_info=exc)
|
||||
return _HttpOutcome(
|
||||
is_success=False,
|
||||
updated_is_reachable=None, # unknown error — don't make assumptions
|
||||
status_code=None,
|
||||
error_message=msg,
|
||||
response_payload=None,
|
||||
)
|
||||
|
||||
if response is None:
|
||||
raise ValueError(
|
||||
"exactly one of response or exc must be non-None; both are None"
|
||||
)
|
||||
status_code = response.status_code
|
||||
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
|
||||
logger.warning(msg, exc_info=e)
|
||||
# 401/403 means the api_key has been revoked or is invalid — mark unreachable
|
||||
# so the operator knows to update it. All other HTTP errors keep is_reachable
|
||||
# as-is (server is up, the request just failed for application reasons).
|
||||
auth_failed = e.response.status_code in (401, 403)
|
||||
return _HttpOutcome(
|
||||
is_success=False,
|
||||
updated_is_reachable=False if auth_failed else None,
|
||||
status_code=status_code,
|
||||
error_message=msg,
|
||||
response_payload=None,
|
||||
)
|
||||
|
||||
try:
|
||||
response_payload = response.json()
|
||||
except (json.JSONDecodeError, httpx.DecodingError) as e:
|
||||
msg = f"Hook returned non-JSON response: {e}"
|
||||
logger.warning(msg, exc_info=e)
|
||||
return _HttpOutcome(
|
||||
is_success=False,
|
||||
updated_is_reachable=None, # server responded — reachability unchanged
|
||||
status_code=status_code,
|
||||
error_message=msg,
|
||||
response_payload=None,
|
||||
)
|
||||
|
||||
if not isinstance(response_payload, dict):
|
||||
msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
|
||||
logger.warning(msg)
|
||||
return _HttpOutcome(
|
||||
is_success=False,
|
||||
updated_is_reachable=None, # server responded — reachability unchanged
|
||||
status_code=status_code,
|
||||
error_message=msg,
|
||||
response_payload=None,
|
||||
)
|
||||
|
||||
return _HttpOutcome(
|
||||
is_success=True,
|
||||
updated_is_reachable=True,
|
||||
status_code=status_code,
|
||||
error_message=None,
|
||||
response_payload=response_payload,
|
||||
)
|
||||
|
||||
|
||||
def _persist_result(
|
||||
*,
|
||||
hook_id: int,
|
||||
outcome: _HttpOutcome,
|
||||
duration_ms: int,
|
||||
) -> None:
|
||||
"""Write the execution log on failure and optionally update is_reachable, each
|
||||
in its own session so a failure in one does not affect the other."""
|
||||
# Only write the execution log on failure — success runs are not recorded.
|
||||
# Must not be skipped if the is_reachable update fails (e.g. hook concurrently
|
||||
# deleted between the initial lookup and here).
|
||||
if not outcome.is_success:
|
||||
try:
|
||||
with get_session_with_current_tenant() as log_session:
|
||||
create_hook_execution_log__no_commit(
|
||||
db_session=log_session,
|
||||
hook_id=hook_id,
|
||||
is_success=False,
|
||||
error_message=outcome.error_message,
|
||||
status_code=outcome.status_code,
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
log_session.commit()
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Failed to persist hook execution log for hook_id={hook_id}"
|
||||
)
|
||||
|
||||
# Update is_reachable separately — best-effort, non-critical.
|
||||
# None means the value is unchanged (set by the caller to skip the no-op write).
|
||||
# update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
|
||||
# concurrently deleted, so keep this isolated from the log write above.
|
||||
if outcome.updated_is_reachable is not None:
|
||||
try:
|
||||
with get_session_with_current_tenant() as reachable_session:
|
||||
update_hook__no_commit(
|
||||
db_session=reachable_session,
|
||||
hook_id=hook_id,
|
||||
is_reachable=outcome.updated_is_reachable,
|
||||
)
|
||||
reachable_session.commit()
|
||||
except Exception:
|
||||
logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def execute_hook(
|
||||
*,
|
||||
db_session: Session,
|
||||
hook_point: HookPoint,
|
||||
payload: dict[str, Any],
|
||||
) -> dict[str, Any] | HookSkipped | HookSoftFailed:
|
||||
"""Execute the hook for the given hook point synchronously."""
|
||||
hook = _lookup_hook(db_session, hook_point)
|
||||
if isinstance(hook, HookSkipped):
|
||||
return hook
|
||||
|
||||
timeout = hook.timeout_seconds
|
||||
hook_id = hook.id
|
||||
fail_strategy = hook.fail_strategy
|
||||
endpoint_url = hook.endpoint_url
|
||||
current_is_reachable: bool | None = hook.is_reachable
|
||||
if not endpoint_url:
|
||||
raise ValueError(
|
||||
f"hook_id={hook_id} is active but has no endpoint_url — "
|
||||
"active hooks without an endpoint_url must be rejected by _lookup_hook"
|
||||
)
|
||||
|
||||
start = time.monotonic()
|
||||
response: httpx.Response | None = None
|
||||
exc: Exception | None = None
|
||||
try:
|
||||
api_key: str | None = (
|
||||
hook.api_key.get_value(apply_mask=False) if hook.api_key else None
|
||||
)
|
||||
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
response = client.post(endpoint_url, json=payload, headers=headers)
|
||||
except Exception as e:
|
||||
exc = e
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
outcome = _process_response(response=response, exc=exc, timeout=timeout)
|
||||
# Skip the is_reachable write when the value would not change — avoids a
|
||||
# no-op DB round-trip on every call when the hook is already in the expected state.
|
||||
if outcome.updated_is_reachable == current_is_reachable:
|
||||
outcome = outcome.model_copy(update={"updated_is_reachable": None})
|
||||
_persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)
|
||||
|
||||
if not outcome.is_success:
|
||||
if fail_strategy == HookFailStrategy.HARD:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.HOOK_EXECUTION_FAILED,
|
||||
outcome.error_message or "Hook execution failed.",
|
||||
)
|
||||
logger.warning(
|
||||
f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
|
||||
)
|
||||
return HookSoftFailed()
|
||||
if outcome.response_payload is None:
|
||||
raise ValueError(
|
||||
f"response_payload is None for successful hook call (hook_id={hook_id})"
|
||||
)
|
||||
return outcome.response_payload
|
||||
121
backend/onyx/hooks/models.py
Normal file
121
backend/onyx/hooks/models.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Annotated
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from pydantic import field_validator
|
||||
from pydantic import model_validator
|
||||
from pydantic import SecretStr
|
||||
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
|
||||
NonEmptySecretStr = Annotated[SecretStr, Field(min_length=1)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Request models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HookCreateRequest(BaseModel):
|
||||
name: str = Field(min_length=1)
|
||||
hook_point: HookPoint
|
||||
endpoint_url: str = Field(min_length=1)
|
||||
api_key: NonEmptySecretStr | None = None
|
||||
fail_strategy: HookFailStrategy | None = None # if None, uses HookPointSpec default
|
||||
timeout_seconds: float | None = Field(
|
||||
default=None, gt=0
|
||||
) # if None, uses HookPointSpec default
|
||||
|
||||
@field_validator("name", "endpoint_url")
|
||||
@classmethod
|
||||
def no_whitespace_only(cls, v: str) -> str:
|
||||
if not v.strip():
|
||||
raise ValueError("cannot be whitespace-only.")
|
||||
return v
|
||||
|
||||
|
||||
class HookUpdateRequest(BaseModel):
|
||||
name: str | None = None
|
||||
endpoint_url: str | None = None
|
||||
api_key: NonEmptySecretStr | None = None
|
||||
fail_strategy: HookFailStrategy | None = None
|
||||
timeout_seconds: float | None = Field(default=None, gt=0)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def require_at_least_one_field(self) -> "HookUpdateRequest":
|
||||
if not self.model_fields_set:
|
||||
raise ValueError("At least one field must be provided for an update.")
|
||||
if "name" in self.model_fields_set and not (self.name or "").strip():
|
||||
raise ValueError("name cannot be cleared.")
|
||||
if (
|
||||
"endpoint_url" in self.model_fields_set
|
||||
and not (self.endpoint_url or "").strip()
|
||||
):
|
||||
raise ValueError("endpoint_url cannot be cleared.")
|
||||
if "fail_strategy" in self.model_fields_set and self.fail_strategy is None:
|
||||
raise ValueError(
|
||||
"fail_strategy cannot be null; omit the field to leave it unchanged."
|
||||
)
|
||||
if "timeout_seconds" in self.model_fields_set and self.timeout_seconds is None:
|
||||
raise ValueError(
|
||||
"timeout_seconds cannot be null; omit the field to leave it unchanged."
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Response models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HookPointMetaResponse(BaseModel):
|
||||
hook_point: HookPoint
|
||||
display_name: str
|
||||
description: str
|
||||
docs_url: str | None
|
||||
input_schema: dict[str, Any]
|
||||
output_schema: dict[str, Any]
|
||||
default_timeout_seconds: float
|
||||
default_fail_strategy: HookFailStrategy
|
||||
fail_hard_description: str
|
||||
|
||||
|
||||
class HookResponse(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
hook_point: HookPoint
|
||||
# Nullable to match the DB column — endpoint_url is required on creation but
|
||||
# future hook point types may not use an external endpoint (e.g. built-in handlers).
|
||||
endpoint_url: str | None
|
||||
fail_strategy: HookFailStrategy
|
||||
timeout_seconds: float # always resolved — None from request is replaced with spec default before DB write
|
||||
is_active: bool
|
||||
is_reachable: bool | None
|
||||
creator_email: str | None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class HookValidateStatus(str, Enum):
|
||||
passed = "passed" # server responded (any status except 401/403)
|
||||
auth_failed = "auth_failed" # server responded with 401 or 403
|
||||
timeout = (
|
||||
"timeout" # TCP connected, but read/write timed out (server exists but slow)
|
||||
)
|
||||
cannot_connect = "cannot_connect" # could not connect to the server
|
||||
|
||||
|
||||
class HookValidateResponse(BaseModel):
|
||||
status: HookValidateStatus
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class HookFailureRecord(BaseModel):
|
||||
error_message: str | None = None
|
||||
status_code: int | None = None
|
||||
duration_ms: int | None = None
|
||||
created_at: datetime
|
||||
0
backend/onyx/hooks/points/__init__.py
Normal file
0
backend/onyx/hooks/points/__init__.py
Normal file
67
backend/onyx/hooks/points/base.py
Normal file
67
backend/onyx/hooks/points/base.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from abc import ABC
|
||||
from abc import abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
|
||||
|
||||
_REQUIRED_ATTRS = (
|
||||
"hook_point",
|
||||
"display_name",
|
||||
"description",
|
||||
"default_timeout_seconds",
|
||||
"fail_hard_description",
|
||||
"default_fail_strategy",
|
||||
)
|
||||
|
||||
|
||||
class HookPointSpec(ABC):
|
||||
"""Static metadata and contract for a pipeline hook point.
|
||||
|
||||
This is NOT a regular class meant for direct instantiation by callers.
|
||||
Each concrete subclass represents exactly one hook point and is instantiated
|
||||
once at startup, registered in onyx.hooks.registry._REGISTRY. No caller
|
||||
should ever create instances directly — use get_hook_point_spec() or
|
||||
get_all_specs() from the registry instead.
|
||||
|
||||
Each hook point is a concrete subclass of this class. Onyx engineers
|
||||
own these definitions — customers never touch this code.
|
||||
|
||||
Subclasses must define all attributes as class-level constants.
|
||||
"""
|
||||
|
||||
hook_point: HookPoint
|
||||
display_name: str
|
||||
description: str
|
||||
default_timeout_seconds: float
|
||||
fail_hard_description: str
|
||||
default_fail_strategy: HookFailStrategy
|
||||
docs_url: str | None = None
|
||||
|
||||
def __init_subclass__(cls, **kwargs: object) -> None:
|
||||
"""Enforce that every concrete subclass declares all required class attributes.
|
||||
|
||||
Called automatically by Python whenever a class inherits from HookPointSpec.
|
||||
Abstract subclasses (those still carrying unimplemented abstract methods) are
|
||||
skipped — they are intermediate base classes and may not yet define everything.
|
||||
Only fully concrete subclasses are validated, ensuring a clear TypeError at
|
||||
import time rather than a confusing AttributeError at runtime.
|
||||
"""
|
||||
super().__init_subclass__(**kwargs)
|
||||
# Skip intermediate abstract subclasses — they may still be partially defined.
|
||||
if getattr(cls, "__abstractmethods__", None):
|
||||
return
|
||||
missing = [attr for attr in _REQUIRED_ATTRS if not hasattr(cls, attr)]
|
||||
if missing:
|
||||
raise TypeError(f"{cls.__name__} must define class attributes: {missing}")
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def input_schema(self) -> dict[str, Any]:
|
||||
"""JSON schema describing the request payload sent to the customer's endpoint."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def output_schema(self) -> dict[str, Any]:
|
||||
"""JSON schema describing the expected response from the customer's endpoint."""
|
||||
29
backend/onyx/hooks/points/document_ingestion.py
Normal file
29
backend/onyx/hooks/points/document_ingestion.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from typing import Any
|
||||
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks.points.base import HookPointSpec
|
||||
|
||||
|
||||
class DocumentIngestionSpec(HookPointSpec):
|
||||
"""Hook point that runs during document ingestion.
|
||||
|
||||
# TODO(@Bo-Onyx): define call site, input/output schema, and timeout budget.
|
||||
"""
|
||||
|
||||
hook_point = HookPoint.DOCUMENT_INGESTION
|
||||
display_name = "Document Ingestion"
|
||||
description = "Runs during document ingestion. Allows filtering or transforming documents before indexing."
|
||||
default_timeout_seconds = 30.0
|
||||
fail_hard_description = "The document will not be indexed."
|
||||
default_fail_strategy = HookFailStrategy.HARD
|
||||
|
||||
@property
|
||||
def input_schema(self) -> dict[str, Any]:
|
||||
# TODO(@Bo-Onyx): define input schema
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
@property
|
||||
def output_schema(self) -> dict[str, Any]:
|
||||
# TODO(@Bo-Onyx): define output schema
|
||||
return {"type": "object", "properties": {}}
|
||||
83
backend/onyx/hooks/points/query_processing.py
Normal file
83
backend/onyx/hooks/points/query_processing.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from typing import Any
|
||||
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks.points.base import HookPointSpec
|
||||
|
||||
|
||||
class QueryProcessingSpec(HookPointSpec):
|
||||
"""Hook point that runs on every user query before it enters the pipeline.
|
||||
|
||||
Call site: inside handle_stream_message_objects() in
|
||||
backend/onyx/chat/process_message.py, immediately after message_text is
|
||||
assigned from the request and before create_new_chat_message() saves it.
|
||||
|
||||
This is the earliest possible point in the query pipeline:
|
||||
- Raw query — unmodified, exactly as the user typed it
|
||||
- No side effects yet — message has not been saved to DB
|
||||
- User identity is available for user-specific logic
|
||||
|
||||
Supported use cases:
|
||||
- Query rejection: block queries based on content or user context
|
||||
- Query rewriting: normalize, expand, or modify the query
|
||||
- PII removal: scrub sensitive data before the LLM sees it
|
||||
- Access control: reject queries from certain users or groups
|
||||
- Query auditing: log or track queries based on business rules
|
||||
"""
|
||||
|
||||
hook_point = HookPoint.QUERY_PROCESSING
|
||||
display_name = "Query Processing"
|
||||
description = (
|
||||
"Runs on every user query before it enters the pipeline. "
|
||||
"Allows rewriting, filtering, or rejecting queries."
|
||||
)
|
||||
default_timeout_seconds = 5.0 # user is actively waiting — keep tight
|
||||
fail_hard_description = (
|
||||
"The query will be blocked and the user will see an error message."
|
||||
)
|
||||
default_fail_strategy = HookFailStrategy.HARD
|
||||
|
||||
@property
|
||||
def input_schema(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The raw query string exactly as the user typed it.",
|
||||
},
|
||||
"user_email": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Email of the user submitting the query, or null if unauthenticated.",
|
||||
},
|
||||
"chat_session_id": {
|
||||
"type": "string",
|
||||
"description": "UUID of the chat session. Always present — the session is guaranteed to exist by the time this hook fires.",
|
||||
},
|
||||
},
|
||||
"required": ["query", "user_email", "chat_session_id"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
@property
|
||||
def output_schema(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": ["string", "null"],
|
||||
"description": (
|
||||
"The (optionally modified) query to use. "
|
||||
"Set to null to reject the query."
|
||||
),
|
||||
},
|
||||
"rejection_message": {
|
||||
"type": ["string", "null"],
|
||||
"description": (
|
||||
"Message shown to the user when query is null. "
|
||||
"Falls back to a generic message if not provided."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
45
backend/onyx/hooks/registry.py
Normal file
45
backend/onyx/hooks/registry.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks.points.base import HookPointSpec
|
||||
from onyx.hooks.points.document_ingestion import DocumentIngestionSpec
|
||||
from onyx.hooks.points.query_processing import QueryProcessingSpec
|
||||
|
||||
# Internal: use `monkeypatch.setattr(registry_module, "_REGISTRY", {...})` to override in tests.
|
||||
_REGISTRY: dict[HookPoint, HookPointSpec] = {
|
||||
HookPoint.DOCUMENT_INGESTION: DocumentIngestionSpec(),
|
||||
HookPoint.QUERY_PROCESSING: QueryProcessingSpec(),
|
||||
}
|
||||
|
||||
|
||||
def validate_registry() -> None:
|
||||
"""Assert that every HookPoint enum value has a registered spec.
|
||||
|
||||
Call once at application startup (e.g. from the FastAPI lifespan hook).
|
||||
Raises RuntimeError if any hook point is missing a spec.
|
||||
"""
|
||||
missing = set(HookPoint) - set(_REGISTRY)
|
||||
if missing:
|
||||
raise RuntimeError(
|
||||
f"Hook point(s) have no registered spec: {missing}. "
|
||||
"Add an entry to onyx.hooks.registry._REGISTRY."
|
||||
)
|
||||
|
||||
|
||||
def get_hook_point_spec(hook_point: HookPoint) -> HookPointSpec:
|
||||
"""Returns the spec for a given hook point.
|
||||
|
||||
Raises ValueError if the hook point has no registered spec — this is a
|
||||
programmer error; every HookPoint enum value must have a corresponding spec
|
||||
in _REGISTRY.
|
||||
"""
|
||||
try:
|
||||
return _REGISTRY[hook_point]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
f"No spec registered for hook point {hook_point!r}. "
|
||||
"Add an entry to onyx.hooks.registry._REGISTRY."
|
||||
)
|
||||
|
||||
|
||||
def get_all_specs() -> list[HookPointSpec]:
|
||||
"""Returns the specs for all registered hook points."""
|
||||
return list(_REGISTRY.values())
|
||||
@@ -1,5 +0,0 @@
|
||||
from onyx.configs.app_configs import HOOK_ENABLED
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
# True only when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
|
||||
HOOKS_AVAILABLE: bool = HOOK_ENABLED and not MULTI_TENANT
|
||||
@@ -395,6 +395,12 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
|
||||
llm = get_default_llm_with_vision()
|
||||
|
||||
if not llm:
|
||||
if get_image_extraction_and_analysis_enabled():
|
||||
logger.warning(
|
||||
"Image analysis is enabled but no vision-capable LLM is "
|
||||
"available — images will not be summarized. Configure a "
|
||||
"vision model in the admin LLM settings."
|
||||
)
|
||||
# Even without LLM, we still convert to IndexingDocument with base Sections
|
||||
return [
|
||||
IndexingDocument(
|
||||
|
||||
@@ -168,10 +168,23 @@ def get_default_llm_with_vision(
|
||||
if model_supports_image_input(
|
||||
default_model.name, default_model.llm_provider.provider
|
||||
):
|
||||
logger.info(
|
||||
"Using default vision model: %s (provider=%s)",
|
||||
default_model.name,
|
||||
default_model.llm_provider.provider,
|
||||
)
|
||||
return create_vision_llm(
|
||||
LLMProviderView.from_model(default_model.llm_provider),
|
||||
default_model.name,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Default vision model %s (provider=%s) does not support "
|
||||
"image input — falling back to searching all providers",
|
||||
default_model.name,
|
||||
default_model.llm_provider.provider,
|
||||
)
|
||||
|
||||
# Fall back to searching all providers
|
||||
models = fetch_existing_models(
|
||||
db_session=db_session,
|
||||
@@ -179,6 +192,10 @@ def get_default_llm_with_vision(
|
||||
)
|
||||
|
||||
if not models:
|
||||
logger.warning(
|
||||
"No LLM models with VISION or CHAT flow type found — "
|
||||
"image summarization will be disabled"
|
||||
)
|
||||
return None
|
||||
|
||||
for model in models:
|
||||
@@ -200,11 +217,25 @@ def get_default_llm_with_vision(
|
||||
|
||||
for model in sorted_models:
|
||||
if model_supports_image_input(model.name, model.llm_provider.provider):
|
||||
logger.info(
|
||||
"Using fallback vision model: %s (provider=%s)",
|
||||
model.name,
|
||||
model.llm_provider.provider,
|
||||
)
|
||||
return create_vision_llm(
|
||||
provider_map[model.llm_provider_id],
|
||||
model.name,
|
||||
)
|
||||
|
||||
checked_models = [
|
||||
f"{m.name} (provider={m.llm_provider.provider})" for m in sorted_models
|
||||
]
|
||||
logger.warning(
|
||||
"No vision-capable model found among %d candidates: %s — "
|
||||
"image summarization will be disabled",
|
||||
len(sorted_models),
|
||||
", ".join(checked_models),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -62,6 +62,7 @@ from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.engine.sql_engine import SqlEngine
|
||||
from onyx.error_handling.exceptions import register_onyx_exception_handlers
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.hooks.registry import validate_registry
|
||||
from onyx.server.api_key.api import router as api_key_router
|
||||
from onyx.server.auth_check import check_router_auth
|
||||
from onyx.server.documents.cc_pair import router as cc_pair_router
|
||||
@@ -76,6 +77,7 @@ from onyx.server.features.default_assistant.api import (
|
||||
)
|
||||
from onyx.server.features.document_set.api import router as document_set_router
|
||||
from onyx.server.features.hierarchy.api import router as hierarchy_router
|
||||
from onyx.server.features.hooks.api import router as hook_router
|
||||
from onyx.server.features.input_prompt.api import (
|
||||
admin_router as admin_input_prompt_router,
|
||||
)
|
||||
@@ -308,6 +310,7 @@ def validate_no_vector_db_settings() -> None:
|
||||
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001
|
||||
validate_no_vector_db_settings()
|
||||
validate_cache_backend_settings()
|
||||
validate_registry()
|
||||
|
||||
# Set recursion limit
|
||||
if SYSTEM_RECURSION_LIMIT is not None:
|
||||
@@ -451,6 +454,7 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
|
||||
|
||||
register_onyx_exception_handlers(application)
|
||||
|
||||
include_router_with_global_prefix_prepended(application, hook_router)
|
||||
include_router_with_global_prefix_prepended(application, password_router)
|
||||
include_router_with_global_prefix_prepended(application, chat_router)
|
||||
include_router_with_global_prefix_prepended(application, query_router)
|
||||
|
||||
0
backend/onyx/server/features/hooks/__init__.py
Normal file
0
backend/onyx/server/features/hooks/__init__.py
Normal file
510
backend/onyx/server/features/hooks/api.py
Normal file
510
backend/onyx/server/features/hooks/api.py
Normal file
@@ -0,0 +1,510 @@
|
||||
import ipaddress
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.auth.users import User
|
||||
from onyx.db.constants import UNSET
|
||||
from onyx.db.constants import UnsetType
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.hook import create_hook__no_commit
|
||||
from onyx.db.hook import delete_hook__no_commit
|
||||
from onyx.db.hook import get_hook_by_id
|
||||
from onyx.db.hook import get_hook_execution_logs
|
||||
from onyx.db.hook import get_hooks
|
||||
from onyx.db.hook import update_hook__no_commit
|
||||
from onyx.db.models import Hook
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.hooks.api_dependencies import require_hook_enabled
|
||||
from onyx.hooks.models import HookCreateRequest
|
||||
from onyx.hooks.models import HookFailureRecord
|
||||
from onyx.hooks.models import HookPointMetaResponse
|
||||
from onyx.hooks.models import HookResponse
|
||||
from onyx.hooks.models import HookUpdateRequest
|
||||
from onyx.hooks.models import HookValidateResponse
|
||||
from onyx.hooks.models import HookValidateStatus
|
||||
from onyx.hooks.registry import get_all_specs
|
||||
from onyx.hooks.registry import get_hook_point_spec
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSRF protection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# RFC 1918 private ranges, loopback, link-local (IMDS at 169.254.169.254),
|
||||
# shared address space, and IPv6 equivalents.
|
||||
_BLOCKED_NETWORKS: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = [
|
||||
ipaddress.ip_network("127.0.0.0/8"), # loopback
|
||||
ipaddress.ip_network("10.0.0.0/8"), # RFC 1918
|
||||
ipaddress.ip_network("172.16.0.0/12"), # RFC 1918
|
||||
ipaddress.ip_network("192.168.0.0/16"), # RFC 1918
|
||||
ipaddress.ip_network("169.254.0.0/16"), # link-local / cloud IMDS
|
||||
ipaddress.ip_network("100.64.0.0/10"), # shared address space (RFC 6598)
|
||||
ipaddress.ip_network("0.0.0.0/8"), # "this" network
|
||||
ipaddress.ip_network("::1/128"), # IPv6 loopback
|
||||
ipaddress.ip_network("fc00::/7"), # IPv6 ULA
|
||||
ipaddress.ip_network("fe80::/10"), # IPv6 link-local
|
||||
]
|
||||
|
||||
|
||||
def _check_ssrf_safety(endpoint_url: str) -> None:
|
||||
"""Raise OnyxError if endpoint_url could be used for SSRF.
|
||||
|
||||
Checks:
|
||||
1. Scheme must be https.
|
||||
2. All IPs the hostname resolves to must be public — private/reserved
|
||||
ranges (RFC 1918, link-local, loopback, cloud IMDS) are blocked.
|
||||
|
||||
Note: this provides a good-faith pre-flight check. DNS rebinding attacks
|
||||
(where the hostname re-resolves to a different IP after this check) are
|
||||
outside the threat model for a single-tenant, admin-only API.
|
||||
"""
|
||||
parsed = urlparse(endpoint_url)
|
||||
|
||||
scheme = (parsed.scheme or "").lower()
|
||||
if scheme != "https":
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"Hook endpoint URL must use https, got: {scheme!r}",
|
||||
)
|
||||
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"Could not parse hostname from URL: {endpoint_url}",
|
||||
)
|
||||
|
||||
try:
|
||||
resolved = socket.getaddrinfo(hostname, None)
|
||||
except socket.gaierror as e:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"Could not resolve hostname {hostname!r}: {e}",
|
||||
)
|
||||
|
||||
for addr_info in resolved:
|
||||
ip_str = addr_info[4][0]
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
except ValueError:
|
||||
continue
|
||||
for blocked in _BLOCKED_NETWORKS:
|
||||
if ip in blocked:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"Hook endpoint URL resolves to a private or reserved IP address "
|
||||
f"({ip}), which is not permitted.",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _hook_to_response(hook: Hook, creator_email: str | None = None) -> HookResponse:
|
||||
return HookResponse(
|
||||
id=hook.id,
|
||||
name=hook.name,
|
||||
hook_point=hook.hook_point,
|
||||
endpoint_url=hook.endpoint_url,
|
||||
fail_strategy=hook.fail_strategy,
|
||||
timeout_seconds=hook.timeout_seconds,
|
||||
is_active=hook.is_active,
|
||||
is_reachable=hook.is_reachable,
|
||||
creator_email=(
|
||||
creator_email
|
||||
if creator_email is not None
|
||||
else (hook.creator.email if hook.creator else None)
|
||||
),
|
||||
created_at=hook.created_at,
|
||||
updated_at=hook.updated_at,
|
||||
)
|
||||
|
||||
|
||||
def _get_hook_or_404(
|
||||
db_session: Session,
|
||||
hook_id: int,
|
||||
include_creator: bool = False,
|
||||
) -> Hook:
|
||||
hook = get_hook_by_id(
|
||||
db_session=db_session,
|
||||
hook_id=hook_id,
|
||||
include_creator=include_creator,
|
||||
)
|
||||
if hook is None:
|
||||
raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook {hook_id} not found.")
|
||||
return hook
|
||||
|
||||
|
||||
def _raise_for_validation_failure(validation: HookValidateResponse) -> None:
|
||||
"""Raise an appropriate OnyxError for a non-passed validation result."""
|
||||
if validation.status == HookValidateStatus.auth_failed:
|
||||
raise OnyxError(OnyxErrorCode.CREDENTIAL_INVALID, validation.error_message)
|
||||
if validation.status == HookValidateStatus.timeout:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.GATEWAY_TIMEOUT,
|
||||
f"Endpoint validation failed: {validation.error_message}",
|
||||
)
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.BAD_GATEWAY,
|
||||
f"Endpoint validation failed: {validation.error_message}",
|
||||
)
|
||||
|
||||
|
||||
def _validate_endpoint(
|
||||
endpoint_url: str,
|
||||
api_key: str | None,
|
||||
timeout_seconds: float,
|
||||
) -> HookValidateResponse:
|
||||
"""Check whether endpoint_url is reachable by sending an empty POST request.
|
||||
|
||||
We use POST since hook endpoints expect POST requests. The server will typically
|
||||
respond with 4xx (missing/invalid body) — that is fine. Any HTTP response means
|
||||
the server is up and routable. A 401/403 response returns auth_failed
|
||||
(not reachable — indicates the api_key is invalid).
|
||||
|
||||
Timeout handling:
|
||||
- ConnectTimeout: TCP handshake never completed → cannot_connect.
|
||||
- ReadTimeout / WriteTimeout: TCP was established, server responded slowly → timeout
|
||||
(operator should consider increasing timeout_seconds).
|
||||
- All other exceptions → cannot_connect.
|
||||
"""
|
||||
_check_ssrf_safety(endpoint_url)
|
||||
headers: dict[str, str] = {}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
try:
|
||||
with httpx.Client(timeout=timeout_seconds) as client:
|
||||
response = client.post(endpoint_url, headers=headers)
|
||||
if response.status_code in (401, 403):
|
||||
return HookValidateResponse(
|
||||
status=HookValidateStatus.auth_failed,
|
||||
error_message=f"Authentication failed (HTTP {response.status_code})",
|
||||
)
|
||||
return HookValidateResponse(status=HookValidateStatus.passed)
|
||||
except httpx.TimeoutException as exc:
|
||||
# ConnectTimeout: TCP handshake never completed → cannot_connect.
|
||||
# ReadTimeout / WriteTimeout: TCP was established, server just responded slowly → timeout.
|
||||
if isinstance(exc, httpx.ConnectTimeout):
|
||||
logger.warning(
|
||||
"Hook endpoint validation: connect timeout for %s",
|
||||
endpoint_url,
|
||||
exc_info=exc,
|
||||
)
|
||||
return HookValidateResponse(
|
||||
status=HookValidateStatus.cannot_connect, error_message=str(exc)
|
||||
)
|
||||
logger.warning(
|
||||
"Hook endpoint validation: read/write timeout for %s",
|
||||
endpoint_url,
|
||||
exc_info=exc,
|
||||
)
|
||||
return HookValidateResponse(
|
||||
status=HookValidateStatus.timeout,
|
||||
error_message="Endpoint timed out — consider increasing timeout_seconds.",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Hook endpoint validation: connection error for %s",
|
||||
endpoint_url,
|
||||
exc_info=exc,
|
||||
)
|
||||
return HookValidateResponse(
|
||||
status=HookValidateStatus.cannot_connect, error_message=str(exc)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Routers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
router = APIRouter(prefix="/admin/hooks")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hook endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.get("/specs")
|
||||
def get_hook_point_specs(
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
) -> list[HookPointMetaResponse]:
|
||||
return [
|
||||
HookPointMetaResponse(
|
||||
hook_point=spec.hook_point,
|
||||
display_name=spec.display_name,
|
||||
description=spec.description,
|
||||
docs_url=spec.docs_url,
|
||||
input_schema=spec.input_schema,
|
||||
output_schema=spec.output_schema,
|
||||
default_timeout_seconds=spec.default_timeout_seconds,
|
||||
default_fail_strategy=spec.default_fail_strategy,
|
||||
fail_hard_description=spec.fail_hard_description,
|
||||
)
|
||||
for spec in get_all_specs()
|
||||
]
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_hooks(
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[HookResponse]:
|
||||
hooks = get_hooks(db_session=db_session, include_creator=True)
|
||||
return [_hook_to_response(h) for h in hooks]
|
||||
|
||||
|
||||
@router.post("")
|
||||
def create_hook(
|
||||
req: HookCreateRequest,
|
||||
user: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> HookResponse:
|
||||
"""Create a new hook. The endpoint is validated before persisting — creation fails if
|
||||
the endpoint cannot be reached or the api_key is invalid. Hooks are created inactive;
|
||||
use POST /{hook_id}/activate once ready to receive traffic."""
|
||||
spec = get_hook_point_spec(req.hook_point)
|
||||
api_key = req.api_key.get_secret_value() if req.api_key else None
|
||||
validation = _validate_endpoint(
|
||||
endpoint_url=req.endpoint_url,
|
||||
api_key=api_key,
|
||||
timeout_seconds=req.timeout_seconds or spec.default_timeout_seconds,
|
||||
)
|
||||
if validation.status != HookValidateStatus.passed:
|
||||
_raise_for_validation_failure(validation)
|
||||
|
||||
hook = create_hook__no_commit(
|
||||
db_session=db_session,
|
||||
name=req.name,
|
||||
hook_point=req.hook_point,
|
||||
endpoint_url=req.endpoint_url,
|
||||
api_key=api_key,
|
||||
fail_strategy=req.fail_strategy or spec.default_fail_strategy,
|
||||
timeout_seconds=req.timeout_seconds or spec.default_timeout_seconds,
|
||||
creator_id=user.id,
|
||||
)
|
||||
hook.is_reachable = True
|
||||
db_session.commit()
|
||||
return _hook_to_response(hook, creator_email=user.email)
|
||||
|
||||
|
||||
@router.get("/{hook_id}")
|
||||
def get_hook(
|
||||
hook_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> HookResponse:
|
||||
hook = _get_hook_or_404(db_session, hook_id, include_creator=True)
|
||||
return _hook_to_response(hook)
|
||||
|
||||
|
||||
@router.patch("/{hook_id}")
|
||||
def update_hook(
|
||||
hook_id: int,
|
||||
req: HookUpdateRequest,
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> HookResponse:
|
||||
"""Update hook fields. If endpoint_url, api_key, or timeout_seconds changes, the
|
||||
endpoint is re-validated using the effective values. For active hooks the update is
|
||||
rejected on validation failure, keeping live traffic unaffected. For inactive hooks
|
||||
the update goes through regardless and is_reachable is updated to reflect the result.
|
||||
|
||||
Note: if an active hook's endpoint is currently down, even a timeout_seconds-only
|
||||
increase will be rejected. The recovery flow is: deactivate → update → reactivate.
|
||||
"""
|
||||
# api_key: UNSET = no change, None = clear, value = update
|
||||
api_key: str | None | UnsetType
|
||||
if "api_key" not in req.model_fields_set:
|
||||
api_key = UNSET
|
||||
elif req.api_key is None:
|
||||
api_key = None
|
||||
else:
|
||||
api_key = req.api_key.get_secret_value()
|
||||
|
||||
endpoint_url_changing = "endpoint_url" in req.model_fields_set
|
||||
api_key_changing = not isinstance(api_key, UnsetType)
|
||||
timeout_changing = "timeout_seconds" in req.model_fields_set
|
||||
|
||||
validated_is_reachable: bool | None = None
|
||||
if endpoint_url_changing or api_key_changing or timeout_changing:
|
||||
existing = _get_hook_or_404(db_session, hook_id)
|
||||
effective_url: str = (
|
||||
req.endpoint_url if endpoint_url_changing else existing.endpoint_url # type: ignore[assignment] # endpoint_url is required on create and cannot be cleared on update
|
||||
)
|
||||
effective_api_key: str | None = (
|
||||
(api_key if not isinstance(api_key, UnsetType) else None)
|
||||
if api_key_changing
|
||||
else (
|
||||
existing.api_key.get_value(apply_mask=False)
|
||||
if existing.api_key
|
||||
else None
|
||||
)
|
||||
)
|
||||
effective_timeout: float = (
|
||||
req.timeout_seconds if timeout_changing else existing.timeout_seconds # type: ignore[assignment] # req.timeout_seconds is non-None when timeout_changing (validated by HookUpdateRequest)
|
||||
)
|
||||
validation = _validate_endpoint(
|
||||
endpoint_url=effective_url,
|
||||
api_key=effective_api_key,
|
||||
timeout_seconds=effective_timeout,
|
||||
)
|
||||
if existing.is_active and validation.status != HookValidateStatus.passed:
|
||||
_raise_for_validation_failure(validation)
|
||||
validated_is_reachable = validation.status == HookValidateStatus.passed
|
||||
|
||||
hook = update_hook__no_commit(
|
||||
db_session=db_session,
|
||||
hook_id=hook_id,
|
||||
name=req.name,
|
||||
endpoint_url=(req.endpoint_url if endpoint_url_changing else UNSET),
|
||||
api_key=api_key,
|
||||
fail_strategy=req.fail_strategy,
|
||||
timeout_seconds=req.timeout_seconds,
|
||||
is_reachable=validated_is_reachable,
|
||||
include_creator=True,
|
||||
)
|
||||
db_session.commit()
|
||||
return _hook_to_response(hook)
|
||||
|
||||
|
||||
@router.delete("/{hook_id}")
|
||||
def delete_hook(
|
||||
hook_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
delete_hook__no_commit(db_session=db_session, hook_id=hook_id)
|
||||
db_session.commit()
|
||||
|
||||
|
||||
@router.post("/{hook_id}/activate")
|
||||
def activate_hook(
|
||||
hook_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> HookResponse:
|
||||
hook = _get_hook_or_404(db_session, hook_id)
|
||||
if not hook.endpoint_url:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT, "Hook has no endpoint URL configured."
|
||||
)
|
||||
|
||||
api_key = hook.api_key.get_value(apply_mask=False) if hook.api_key else None
|
||||
validation = _validate_endpoint(
|
||||
endpoint_url=hook.endpoint_url,
|
||||
api_key=api_key,
|
||||
timeout_seconds=hook.timeout_seconds,
|
||||
)
|
||||
if validation.status != HookValidateStatus.passed:
|
||||
# Persist is_reachable=False in a separate session so the request
|
||||
# session has no commits on the failure path and the transaction
|
||||
# boundary stays clean.
|
||||
if hook.is_reachable is not False:
|
||||
with get_session_with_current_tenant() as side_session:
|
||||
update_hook__no_commit(
|
||||
db_session=side_session, hook_id=hook_id, is_reachable=False
|
||||
)
|
||||
side_session.commit()
|
||||
_raise_for_validation_failure(validation)
|
||||
|
||||
hook = update_hook__no_commit(
|
||||
db_session=db_session,
|
||||
hook_id=hook_id,
|
||||
is_active=True,
|
||||
is_reachable=True,
|
||||
include_creator=True,
|
||||
)
|
||||
db_session.commit()
|
||||
return _hook_to_response(hook)
|
||||
|
||||
|
||||
@router.post("/{hook_id}/validate")
|
||||
def validate_hook(
|
||||
hook_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> HookValidateResponse:
|
||||
hook = _get_hook_or_404(db_session, hook_id)
|
||||
if not hook.endpoint_url:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT, "Hook has no endpoint URL configured."
|
||||
)
|
||||
|
||||
api_key = hook.api_key.get_value(apply_mask=False) if hook.api_key else None
|
||||
validation = _validate_endpoint(
|
||||
endpoint_url=hook.endpoint_url,
|
||||
api_key=api_key,
|
||||
timeout_seconds=hook.timeout_seconds,
|
||||
)
|
||||
validation_passed = validation.status == HookValidateStatus.passed
|
||||
if hook.is_reachable != validation_passed:
|
||||
update_hook__no_commit(
|
||||
db_session=db_session, hook_id=hook_id, is_reachable=validation_passed
|
||||
)
|
||||
db_session.commit()
|
||||
return validation
|
||||
|
||||
|
||||
@router.post("/{hook_id}/deactivate")
|
||||
def deactivate_hook(
|
||||
hook_id: int,
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> HookResponse:
|
||||
hook = update_hook__no_commit(
|
||||
db_session=db_session,
|
||||
hook_id=hook_id,
|
||||
is_active=False,
|
||||
include_creator=True,
|
||||
)
|
||||
db_session.commit()
|
||||
return _hook_to_response(hook)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Execution log endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.get("/{hook_id}/execution-logs")
|
||||
def list_hook_execution_logs(
|
||||
hook_id: int,
|
||||
limit: int = Query(default=50, ge=1, le=100),
|
||||
_: User = Depends(current_admin_user),
|
||||
_hook_enabled: None = Depends(require_hook_enabled),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> list[HookFailureRecord]:
|
||||
_get_hook_or_404(db_session, hook_id)
|
||||
logs = get_hook_execution_logs(db_session=db_session, hook_id=hook_id, limit=limit)
|
||||
return [
|
||||
HookFailureRecord(
|
||||
error_message=log.error_message,
|
||||
status_code=log.status_code,
|
||||
duration_ms=log.duration_ms,
|
||||
created_at=log.created_at,
|
||||
)
|
||||
for log in logs
|
||||
]
|
||||
@@ -0,0 +1,274 @@
|
||||
"""
|
||||
External dependency unit tests for user file delete queue protections.
|
||||
|
||||
Verifies that the three mechanisms added to check_for_user_file_delete work
|
||||
correctly:
|
||||
|
||||
1. Queue depth backpressure – when the broker queue exceeds
|
||||
USER_FILE_DELETE_MAX_QUEUE_DEPTH, no new tasks are enqueued.
|
||||
|
||||
2. Per-file Redis guard key – if the guard key for a file already exists in
|
||||
Redis, that file is skipped even though it is still in DELETING status.
|
||||
|
||||
3. Task expiry – every send_task call carries expires=
|
||||
CELERY_USER_FILE_DELETE_TASK_EXPIRES so that stale queued tasks are
|
||||
discarded by workers automatically.
|
||||
|
||||
Also verifies that delete_user_file_impl clears the guard key the moment
|
||||
it is picked up by a worker.
|
||||
|
||||
Uses real Redis (DB 0 via get_redis_client) and real PostgreSQL for UserFile
|
||||
rows. The Celery app is provided as a MagicMock injected via a PropertyMock
|
||||
on the task class so no real broker is needed.
|
||||
"""
|
||||
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import PropertyMock
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
_user_file_delete_lock_key,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
_user_file_delete_queued_key,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
check_for_user_file_delete,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
process_single_user_file_delete,
|
||||
)
|
||||
from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
|
||||
from onyx.db.enums import UserFileStatus
|
||||
from onyx.db.models import UserFile
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from tests.external_dependency_unit.conftest import create_test_user
|
||||
from tests.external_dependency_unit.constants import TEST_TENANT_ID
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PATCH_QUEUE_LEN = (
|
||||
"onyx.background.celery.tasks.user_file_processing.tasks.celery_get_queue_length"
|
||||
)
|
||||
|
||||
|
||||
def _create_deleting_user_file(db_session: Session, user_id: object) -> UserFile:
|
||||
"""Insert a UserFile in DELETING status and return it."""
|
||||
uf = UserFile(
|
||||
id=uuid4(),
|
||||
user_id=user_id,
|
||||
file_id=f"test_file_{uuid4().hex[:8]}",
|
||||
name=f"test_{uuid4().hex[:8]}.txt",
|
||||
file_type="text/plain",
|
||||
status=UserFileStatus.DELETING,
|
||||
)
|
||||
db_session.add(uf)
|
||||
db_session.commit()
|
||||
db_session.refresh(uf)
|
||||
return uf
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
|
||||
"""Patch the ``app`` property on *task*'s class so that ``self.app``
|
||||
inside the task function returns *mock_app*.
|
||||
|
||||
With ``bind=True``, ``task.run`` is a bound method whose ``__self__`` is
|
||||
the actual task instance. We patch ``app`` on that instance's class
|
||||
(a unique Celery-generated Task subclass) so the mock is scoped to this
|
||||
task only.
|
||||
"""
|
||||
task_instance = task.run.__self__
|
||||
with patch.object(
|
||||
type(task_instance), "app", new_callable=PropertyMock, return_value=mock_app
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDeleteQueueDepthBackpressure:
|
||||
"""Protection 1: skip all enqueuing when the broker queue is too deep."""
|
||||
|
||||
def test_no_tasks_enqueued_when_queue_over_limit(
|
||||
self,
|
||||
db_session: Session,
|
||||
tenant_context: None, # noqa: ARG002
|
||||
) -> None:
|
||||
"""When the queue depth exceeds the limit the beat cycle is skipped."""
|
||||
user = create_test_user(db_session, "del_bp_user")
|
||||
_create_deleting_user_file(db_session, user.id)
|
||||
|
||||
mock_app = MagicMock()
|
||||
|
||||
with (
|
||||
_patch_task_app(check_for_user_file_delete, mock_app),
|
||||
patch(_PATCH_QUEUE_LEN, return_value=USER_FILE_DELETE_MAX_QUEUE_DEPTH + 1),
|
||||
):
|
||||
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
|
||||
|
||||
mock_app.send_task.assert_not_called()
|
||||
|
||||
|
||||
class TestDeletePerFileGuardKey:
|
||||
"""Protection 2: per-file Redis guard key prevents duplicate enqueue."""
|
||||
|
||||
def test_guarded_file_not_re_enqueued(
|
||||
self,
|
||||
db_session: Session,
|
||||
tenant_context: None, # noqa: ARG002
|
||||
) -> None:
|
||||
"""A file whose guard key is already set in Redis is skipped."""
|
||||
user = create_test_user(db_session, "del_guard_user")
|
||||
uf = _create_deleting_user_file(db_session, user.id)
|
||||
|
||||
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
|
||||
guard_key = _user_file_delete_queued_key(uf.id)
|
||||
redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)
|
||||
|
||||
mock_app = MagicMock()
|
||||
|
||||
try:
|
||||
with (
|
||||
_patch_task_app(check_for_user_file_delete, mock_app),
|
||||
patch(_PATCH_QUEUE_LEN, return_value=0),
|
||||
):
|
||||
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
|
||||
|
||||
# send_task must not have been called with this specific file's ID
|
||||
for call in mock_app.send_task.call_args_list:
|
||||
kwargs = call.kwargs.get("kwargs", {})
|
||||
assert kwargs.get("user_file_id") != str(
|
||||
uf.id
|
||||
), f"File {uf.id} should have been skipped because its guard key exists"
|
||||
finally:
|
||||
redis_client.delete(guard_key)
|
||||
|
||||
def test_guard_key_exists_in_redis_after_enqueue(
|
||||
self,
|
||||
db_session: Session,
|
||||
tenant_context: None, # noqa: ARG002
|
||||
) -> None:
|
||||
"""After a file is enqueued its guard key is present in Redis with a TTL."""
|
||||
user = create_test_user(db_session, "del_guard_set_user")
|
||||
uf = _create_deleting_user_file(db_session, user.id)
|
||||
|
||||
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
|
||||
guard_key = _user_file_delete_queued_key(uf.id)
|
||||
redis_client.delete(guard_key) # clean slate
|
||||
|
||||
mock_app = MagicMock()
|
||||
|
||||
try:
|
||||
with (
|
||||
_patch_task_app(check_for_user_file_delete, mock_app),
|
||||
patch(_PATCH_QUEUE_LEN, return_value=0),
|
||||
):
|
||||
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
|
||||
|
||||
assert redis_client.exists(
|
||||
guard_key
|
||||
), "Guard key should be set in Redis after enqueue"
|
||||
ttl = int(redis_client.ttl(guard_key)) # type: ignore[arg-type]
|
||||
assert (
|
||||
0 < ttl <= CELERY_USER_FILE_DELETE_TASK_EXPIRES
|
||||
), f"Guard key TTL {ttl}s is outside the expected range (0, {CELERY_USER_FILE_DELETE_TASK_EXPIRES}]"
|
||||
finally:
|
||||
redis_client.delete(guard_key)
|
||||
|
||||
|
||||
class TestDeleteTaskExpiry:
|
||||
"""Protection 3: every send_task call includes an expires value."""
|
||||
|
||||
def test_send_task_called_with_expires(
|
||||
self,
|
||||
db_session: Session,
|
||||
tenant_context: None, # noqa: ARG002
|
||||
) -> None:
|
||||
"""send_task is called with the correct queue, task name, and expires."""
|
||||
user = create_test_user(db_session, "del_expires_user")
|
||||
uf = _create_deleting_user_file(db_session, user.id)
|
||||
|
||||
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
|
||||
guard_key = _user_file_delete_queued_key(uf.id)
|
||||
redis_client.delete(guard_key)
|
||||
|
||||
mock_app = MagicMock()
|
||||
|
||||
try:
|
||||
with (
|
||||
_patch_task_app(check_for_user_file_delete, mock_app),
|
||||
patch(_PATCH_QUEUE_LEN, return_value=0),
|
||||
):
|
||||
check_for_user_file_delete.run(tenant_id=TEST_TENANT_ID)
|
||||
|
||||
# At least one task should have been submitted (for our file)
|
||||
assert (
|
||||
mock_app.send_task.call_count >= 1
|
||||
), "Expected at least one task to be submitted"
|
||||
|
||||
# Every submitted task must carry expires
|
||||
for call in mock_app.send_task.call_args_list:
|
||||
assert call.args[0] == OnyxCeleryTask.DELETE_SINGLE_USER_FILE
|
||||
assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_DELETE
|
||||
assert (
|
||||
call.kwargs.get("expires") == CELERY_USER_FILE_DELETE_TASK_EXPIRES
|
||||
), "Task must be submitted with the correct expires value to prevent stale task accumulation"
|
||||
finally:
|
||||
redis_client.delete(guard_key)
|
||||
|
||||
|
||||
class TestDeleteWorkerClearsGuardKey:
|
||||
"""process_single_user_file_delete removes the guard key when it picks up a task."""
|
||||
|
||||
def test_guard_key_deleted_on_pickup(
|
||||
self,
|
||||
tenant_context: None, # noqa: ARG002
|
||||
) -> None:
|
||||
"""The guard key is deleted before the worker does any real work.
|
||||
|
||||
We simulate an already-locked file so delete_user_file_impl returns
|
||||
early – but crucially, after the guard key deletion.
|
||||
"""
|
||||
user_file_id = str(uuid4())
|
||||
|
||||
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
|
||||
guard_key = _user_file_delete_queued_key(user_file_id)
|
||||
|
||||
# Simulate the guard key set when the beat enqueued the task
|
||||
redis_client.setex(guard_key, CELERY_USER_FILE_DELETE_TASK_EXPIRES, 1)
|
||||
assert redis_client.exists(guard_key), "Guard key must exist before pickup"
|
||||
|
||||
# Hold the per-file delete lock so the worker exits early without
|
||||
# touching the database or file store.
|
||||
lock_key = _user_file_delete_lock_key(user_file_id)
|
||||
delete_lock = redis_client.lock(lock_key, timeout=10)
|
||||
acquired = delete_lock.acquire(blocking=False)
|
||||
assert acquired, "Should be able to acquire the delete lock for this test"
|
||||
|
||||
try:
|
||||
process_single_user_file_delete.run(
|
||||
user_file_id=user_file_id,
|
||||
tenant_id=TEST_TENANT_ID,
|
||||
)
|
||||
finally:
|
||||
if delete_lock.owned():
|
||||
delete_lock.release()
|
||||
|
||||
assert not redis_client.exists(
|
||||
guard_key
|
||||
), "Guard key should be deleted when the worker picks up the task"
|
||||
@@ -97,6 +97,23 @@ def _patch_hybrid_search_normalization_pipeline(
|
||||
)
|
||||
|
||||
|
||||
def _patch_opensearch_match_highlights_disabled(
|
||||
monkeypatch: pytest.MonkeyPatch, disabled: bool
|
||||
) -> None:
|
||||
"""
|
||||
Patches OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED wherever necessary for this
|
||||
test file.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"onyx.configs.app_configs.OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED",
|
||||
disabled,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"onyx.document_index.opensearch.search.OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED",
|
||||
disabled,
|
||||
)
|
||||
|
||||
|
||||
def _create_test_document_chunk(
|
||||
document_id: str,
|
||||
content: str,
|
||||
@@ -805,6 +822,7 @@ class TestOpenSearchClient:
|
||||
"""Tests all hybrid search configurations and pipelines."""
|
||||
# Precondition.
|
||||
_patch_global_tenant_state(monkeypatch, False)
|
||||
_patch_opensearch_match_highlights_disabled(monkeypatch, False)
|
||||
tenant_state = TenantState(tenant_id=POSTGRES_DEFAULT_SCHEMA, multitenant=False)
|
||||
mappings = DocumentSchema.get_document_schema(
|
||||
vector_dimension=128, multitenant=tenant_state.multitenant
|
||||
@@ -947,6 +965,7 @@ class TestOpenSearchClient:
|
||||
"""
|
||||
# Precondition.
|
||||
_patch_global_tenant_state(monkeypatch, True)
|
||||
_patch_opensearch_match_highlights_disabled(monkeypatch, False)
|
||||
tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
|
||||
tenant_y = TenantState(tenant_id="tenant-y", multitenant=True)
|
||||
mappings = DocumentSchema.get_document_schema(
|
||||
@@ -1077,6 +1096,7 @@ class TestOpenSearchClient:
|
||||
"""
|
||||
# Precondition.
|
||||
_patch_global_tenant_state(monkeypatch, True)
|
||||
_patch_opensearch_match_highlights_disabled(monkeypatch, False)
|
||||
tenant_x = TenantState(tenant_id="tenant-x", multitenant=True)
|
||||
mappings = DocumentSchema.get_document_schema(
|
||||
vector_dimension=128, multitenant=tenant_x.multitenant
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
Unit test verifying that the upload API path sends tasks with expires=.
|
||||
|
||||
The upload_files_to_user_files_with_indexing function must include expires=
|
||||
on every send_task call to prevent phantom task accumulation if the worker
|
||||
is down or slow.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.db.models import UserFile
|
||||
from onyx.db.projects import upload_files_to_user_files_with_indexing
|
||||
|
||||
|
||||
def _make_mock_user_file() -> MagicMock:
|
||||
uf = MagicMock(spec=UserFile)
|
||||
uf.id = str(uuid4())
|
||||
return uf
|
||||
|
||||
|
||||
@patch("onyx.db.projects.get_current_tenant_id", return_value="test_tenant")
|
||||
@patch("onyx.db.projects.create_user_files")
|
||||
@patch(
|
||||
"onyx.background.celery.versioned_apps.client.app",
|
||||
new_callable=MagicMock,
|
||||
)
|
||||
def test_send_task_includes_expires(
|
||||
mock_client_app: MagicMock,
|
||||
mock_create: MagicMock,
|
||||
mock_tenant: MagicMock, # noqa: ARG001
|
||||
) -> None:
|
||||
"""Every send_task call from the upload path must include expires=."""
|
||||
user_files = [_make_mock_user_file(), _make_mock_user_file()]
|
||||
mock_create.return_value = MagicMock(
|
||||
user_files=user_files,
|
||||
rejected_files=[],
|
||||
id_to_temp_id={},
|
||||
)
|
||||
|
||||
mock_user = MagicMock()
|
||||
mock_db_session = MagicMock()
|
||||
|
||||
upload_files_to_user_files_with_indexing(
|
||||
files=[],
|
||||
project_id=None,
|
||||
user=mock_user,
|
||||
temp_id_map=None,
|
||||
db_session=mock_db_session,
|
||||
)
|
||||
|
||||
assert mock_client_app.send_task.call_count == len(user_files)
|
||||
|
||||
for call in mock_client_app.send_task.call_args_list:
|
||||
assert call.args[0] == OnyxCeleryTask.PROCESS_SINGLE_USER_FILE
|
||||
assert call.kwargs.get("queue") == OnyxCeleryQueues.USER_FILE_PROCESSING
|
||||
assert (
|
||||
call.kwargs.get("expires") == CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
), "send_task must include expires= to prevent phantom task accumulation"
|
||||
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
Unit tests for image summarization error handling.
|
||||
|
||||
Verifies that:
|
||||
1. LLM errors produce actionable error messages (not base64 dumps)
|
||||
2. Unsupported MIME type logs include the magic bytes and size
|
||||
3. The ValueError raised on LLM failure preserves the original exception
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.file_processing.image_summarization import _summarize_image
|
||||
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
|
||||
|
||||
|
||||
class TestSummarizeImageErrorMessage:
|
||||
"""_summarize_image must not dump base64 image data into error messages."""
|
||||
|
||||
def test_error_message_contains_exception_type_not_base64(self) -> None:
|
||||
"""The ValueError should contain the original exception info, not message payloads."""
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = RuntimeError("Connection timeout")
|
||||
|
||||
# A fake base64-encoded image string (should NOT appear in the error)
|
||||
fake_encoded = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg..."
|
||||
|
||||
with pytest.raises(ValueError, match="RuntimeError: Connection timeout"):
|
||||
_summarize_image(fake_encoded, mock_llm, query="test")
|
||||
|
||||
def test_error_message_does_not_contain_base64(self) -> None:
|
||||
"""Ensure base64 data is never included in the error message."""
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = RuntimeError("API error")
|
||||
|
||||
fake_encoded = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA"
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_summarize_image(fake_encoded, mock_llm)
|
||||
|
||||
error_str = str(exc_info.value)
|
||||
assert "base64" not in error_str
|
||||
assert "iVBOR" not in error_str
|
||||
|
||||
def test_original_exception_is_chained(self) -> None:
|
||||
"""The ValueError should chain the original exception via __cause__."""
|
||||
mock_llm = MagicMock()
|
||||
original = RuntimeError("upstream failure")
|
||||
mock_llm.invoke.side_effect = original
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
assert exc_info.value.__cause__ is original
|
||||
|
||||
|
||||
class TestUnsupportedMimeTypeLogging:
|
||||
"""summarize_image_with_error_handling should log useful info for unsupported formats."""
|
||||
|
||||
@patch(
|
||||
"onyx.file_processing.image_summarization.summarize_image_pipeline",
|
||||
side_effect=__import__(
|
||||
"onyx.file_processing.image_summarization",
|
||||
fromlist=["UnsupportedImageFormatError"],
|
||||
).UnsupportedImageFormatError("unsupported"),
|
||||
)
|
||||
def test_logs_magic_bytes_and_size(
|
||||
self, mock_pipeline: MagicMock # noqa: ARG002
|
||||
) -> None:
|
||||
"""The info log should include magic bytes hex and image size."""
|
||||
mock_llm = MagicMock()
|
||||
# TIFF magic bytes (not in the supported list)
|
||||
image_data = b"\x49\x49\x2a\x00" + b"\x00" * 100
|
||||
|
||||
with patch("onyx.file_processing.image_summarization.logger") as mock_logger:
|
||||
result = summarize_image_with_error_handling(
|
||||
llm=mock_llm,
|
||||
image_data=image_data,
|
||||
context_name="test_image.tiff",
|
||||
)
|
||||
|
||||
assert result is None
|
||||
mock_logger.info.assert_called_once()
|
||||
log_args = mock_logger.info.call_args
|
||||
# Check the format string args contain magic bytes and size
|
||||
assert "49492a00" in str(log_args)
|
||||
assert "104" in str(log_args) # 4 + 100 bytes
|
||||
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Unit tests verifying that LiteLLM error details are extracted and surfaced
|
||||
in image summarization error messages.
|
||||
|
||||
When the LLM call fails, the error handler should include the status_code,
|
||||
llm_provider, and model from LiteLLM exceptions so operators can diagnose
|
||||
the root cause (rate limit, content filter, unsupported vision, etc.)
|
||||
without needing to dig through LiteLLM internals.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.file_processing.image_summarization import _summarize_image
|
||||
|
||||
|
||||
def _make_litellm_style_error(
|
||||
*,
|
||||
message: str = "API error",
|
||||
status_code: int | None = None,
|
||||
llm_provider: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> RuntimeError:
|
||||
"""Create an exception with LiteLLM-style attributes."""
|
||||
exc = RuntimeError(message)
|
||||
if status_code is not None:
|
||||
exc.status_code = status_code # type: ignore[attr-defined]
|
||||
if llm_provider is not None:
|
||||
exc.llm_provider = llm_provider # type: ignore[attr-defined]
|
||||
if model is not None:
|
||||
exc.model = model # type: ignore[attr-defined]
|
||||
return exc
|
||||
|
||||
|
||||
class TestLiteLLMErrorExtraction:
|
||||
"""Verify that LiteLLM error attributes are included in the ValueError."""
|
||||
|
||||
def test_status_code_included(self) -> None:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = _make_litellm_style_error(
|
||||
message="Content filter triggered",
|
||||
status_code=400,
|
||||
llm_provider="azure",
|
||||
model="gpt-4o",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="status_code=400"):
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
def test_llm_provider_included(self) -> None:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = _make_litellm_style_error(
|
||||
message="Bad request",
|
||||
status_code=400,
|
||||
llm_provider="azure",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="llm_provider=azure"):
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
def test_model_included(self) -> None:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = _make_litellm_style_error(
|
||||
message="Bad request",
|
||||
model="gpt-4o",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="model=gpt-4o"):
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
def test_all_fields_in_single_message(self) -> None:
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = _make_litellm_style_error(
|
||||
message="Rate limit exceeded",
|
||||
status_code=429,
|
||||
llm_provider="azure",
|
||||
model="gpt-4o",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
msg = str(exc_info.value)
|
||||
assert "status_code=429" in msg
|
||||
assert "llm_provider=azure" in msg
|
||||
assert "model=gpt-4o" in msg
|
||||
assert "Rate limit exceeded" in msg
|
||||
|
||||
def test_plain_exception_without_litellm_attrs(self) -> None:
|
||||
"""Non-LiteLLM exceptions should still produce a useful message."""
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.invoke.side_effect = ConnectionError("Connection refused")
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
msg = str(exc_info.value)
|
||||
assert "ConnectionError" in msg
|
||||
assert "Connection refused" in msg
|
||||
# Should not contain status_code/llm_provider/model
|
||||
assert "status_code" not in msg
|
||||
assert "llm_provider" not in msg
|
||||
|
||||
def test_no_base64_in_error(self) -> None:
|
||||
"""Error messages must not contain the full base64 image payload.
|
||||
|
||||
Some LiteLLM exceptions echo the request body (including base64 images)
|
||||
in their message. The truncation guard ensures the bulk of such a
|
||||
payload is stripped from the re-raised ValueError.
|
||||
"""
|
||||
mock_llm = MagicMock()
|
||||
# Build a long base64-like payload that exceeds the 512-char truncation
|
||||
fake_b64_payload = "iVBORw0KGgo" * 100 # ~1100 chars
|
||||
fake_b64 = f"data:image/png;base64,{fake_b64_payload}"
|
||||
|
||||
mock_llm.invoke.side_effect = RuntimeError(
|
||||
f"Request failed for payload: {fake_b64}"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_summarize_image(fake_b64, mock_llm)
|
||||
|
||||
msg = str(exc_info.value)
|
||||
# The full payload must not appear (truncation should have kicked in)
|
||||
assert fake_b64_payload not in msg
|
||||
assert "truncated" in msg
|
||||
|
||||
def test_long_error_message_truncated(self) -> None:
|
||||
"""Exception messages longer than 512 chars are truncated."""
|
||||
mock_llm = MagicMock()
|
||||
long_msg = "x" * 1000
|
||||
mock_llm.invoke.side_effect = RuntimeError(long_msg)
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
_summarize_image("data:image/png;base64,abc", mock_llm)
|
||||
|
||||
msg = str(exc_info.value)
|
||||
assert "truncated" in msg
|
||||
# The full 1000-char string should not appear
|
||||
assert long_msg not in msg
|
||||
22
backend/tests/unit/onyx/hooks/test_base_spec.py
Normal file
22
backend/tests/unit/onyx/hooks/test_base_spec.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks.points.base import HookPointSpec
|
||||
|
||||
|
||||
def test_init_subclass_raises_for_missing_attrs() -> None:
|
||||
with pytest.raises(TypeError, match="must define class attributes"):
|
||||
|
||||
class IncompleteSpec(HookPointSpec):
|
||||
hook_point = HookPoint.QUERY_PROCESSING
|
||||
# missing display_name, description, etc.
|
||||
|
||||
@property
|
||||
def input_schema(self) -> dict[str, Any]:
|
||||
return {}
|
||||
|
||||
@property
|
||||
def output_schema(self) -> dict[str, Any]:
|
||||
return {}
|
||||
@@ -1,541 +0,0 @@
|
||||
"""Unit tests for the hook executor."""
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.hooks.executor import execute_hook
|
||||
from onyx.hooks.executor import HookSkipped
|
||||
from onyx.hooks.executor import HookSoftFailed
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PAYLOAD: dict[str, Any] = {"query": "test", "user_email": "u@example.com"}
|
||||
_RESPONSE_PAYLOAD: dict[str, Any] = {"rewritten_query": "better test"}
|
||||
|
||||
|
||||
def _make_hook(
|
||||
*,
|
||||
is_active: bool = True,
|
||||
endpoint_url: str | None = "https://hook.example.com/query",
|
||||
api_key: MagicMock | None = None,
|
||||
timeout_seconds: float = 5.0,
|
||||
fail_strategy: HookFailStrategy = HookFailStrategy.SOFT,
|
||||
hook_id: int = 1,
|
||||
is_reachable: bool | None = None,
|
||||
) -> MagicMock:
|
||||
hook = MagicMock()
|
||||
hook.is_active = is_active
|
||||
hook.endpoint_url = endpoint_url
|
||||
hook.api_key = api_key
|
||||
hook.timeout_seconds = timeout_seconds
|
||||
hook.id = hook_id
|
||||
hook.fail_strategy = fail_strategy
|
||||
hook.is_reachable = is_reachable
|
||||
return hook
|
||||
|
||||
|
||||
def _make_api_key(value: str) -> MagicMock:
|
||||
api_key = MagicMock()
|
||||
api_key.get_value.return_value = value
|
||||
return api_key
|
||||
|
||||
|
||||
def _make_response(
|
||||
*,
|
||||
status_code: int = 200,
|
||||
json_return: Any = _RESPONSE_PAYLOAD,
|
||||
json_side_effect: Exception | None = None,
|
||||
) -> MagicMock:
|
||||
"""Build a response mock with controllable json() behaviour."""
|
||||
response = MagicMock()
|
||||
response.status_code = status_code
|
||||
if json_side_effect is not None:
|
||||
response.json.side_effect = json_side_effect
|
||||
else:
|
||||
response.json.return_value = json_return
|
||||
return response
|
||||
|
||||
|
||||
def _setup_client(
|
||||
mock_client_cls: MagicMock,
|
||||
*,
|
||||
response: MagicMock | None = None,
|
||||
side_effect: Exception | None = None,
|
||||
) -> MagicMock:
|
||||
"""Wire up the httpx.Client mock and return the inner client.
|
||||
|
||||
If side_effect is an httpx.HTTPStatusError, it is raised from
|
||||
raise_for_status() (matching real httpx behaviour) and post() returns a
|
||||
response mock with the matching status_code set. All other exceptions are
|
||||
raised directly from post().
|
||||
"""
|
||||
mock_client = MagicMock()
|
||||
|
||||
if isinstance(side_effect, httpx.HTTPStatusError):
|
||||
error_response = MagicMock()
|
||||
error_response.status_code = side_effect.response.status_code
|
||||
error_response.raise_for_status.side_effect = side_effect
|
||||
mock_client.post = MagicMock(return_value=error_response)
|
||||
else:
|
||||
mock_client.post = MagicMock(
|
||||
side_effect=side_effect, return_value=response if not side_effect else None
|
||||
)
|
||||
|
||||
mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)
|
||||
return mock_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def db_session() -> MagicMock:
|
||||
return MagicMock()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Early-exit guards (no HTTP call, no DB writes)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"hooks_available,hook",
|
||||
[
|
||||
# HOOKS_AVAILABLE=False exits before the DB lookup — hook is irrelevant.
|
||||
pytest.param(False, None, id="hooks_not_available"),
|
||||
pytest.param(True, None, id="hook_not_found"),
|
||||
pytest.param(True, _make_hook(is_active=False), id="hook_inactive"),
|
||||
pytest.param(True, _make_hook(endpoint_url=None), id="no_endpoint_url"),
|
||||
],
|
||||
)
|
||||
def test_early_exit_returns_skipped_with_no_db_writes(
|
||||
db_session: MagicMock,
|
||||
hooks_available: bool,
|
||||
hook: MagicMock | None,
|
||||
) -> None:
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", hooks_available),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
|
||||
):
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
assert isinstance(result, HookSkipped)
|
||||
mock_update.assert_not_called()
|
||||
mock_log.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Successful HTTP call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_success_returns_payload_and_sets_reachable(db_session: MagicMock) -> None:
|
||||
hook = _make_hook()
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(mock_client_cls, response=_make_response())
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
assert result == _RESPONSE_PAYLOAD
|
||||
_, update_kwargs = mock_update.call_args
|
||||
assert update_kwargs["is_reachable"] is True
|
||||
mock_log.assert_not_called()
|
||||
|
||||
|
||||
def test_success_skips_reachable_write_when_already_true(db_session: MagicMock) -> None:
|
||||
"""Deduplication guard: a hook already at is_reachable=True that succeeds
|
||||
must not trigger a DB write."""
|
||||
hook = _make_hook(is_reachable=True)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(mock_client_cls, response=_make_response())
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
assert result == _RESPONSE_PAYLOAD
|
||||
mock_update.assert_not_called()
|
||||
|
||||
|
||||
def test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:
|
||||
"""response.json() returning a non-dict (e.g. list) must be treated as failure.
|
||||
The server responded, so is_reachable is not updated."""
|
||||
hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(
|
||||
mock_client_cls,
|
||||
response=_make_response(json_return=["unexpected", "list"]),
|
||||
)
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
assert isinstance(result, HookSoftFailed)
|
||||
_, log_kwargs = mock_log.call_args
|
||||
assert log_kwargs["is_success"] is False
|
||||
assert "non-dict" in (log_kwargs["error_message"] or "")
|
||||
mock_update.assert_not_called()
|
||||
|
||||
|
||||
def test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:
|
||||
"""response.json() raising must be treated as failure with SOFT strategy.
|
||||
The server responded, so is_reachable is not updated."""
|
||||
hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(
|
||||
mock_client_cls,
|
||||
response=_make_response(
|
||||
json_side_effect=json.JSONDecodeError("not JSON", "", 0)
|
||||
),
|
||||
)
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
assert isinstance(result, HookSoftFailed)
|
||||
_, log_kwargs = mock_log.call_args
|
||||
assert log_kwargs["is_success"] is False
|
||||
assert "non-JSON" in (log_kwargs["error_message"] or "")
|
||||
mock_update.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP failure paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"exception,fail_strategy,expected_type,expected_is_reachable",
|
||||
[
|
||||
# NetworkError → is_reachable=False
|
||||
pytest.param(
|
||||
httpx.ConnectError("refused"),
|
||||
HookFailStrategy.SOFT,
|
||||
HookSoftFailed,
|
||||
False,
|
||||
id="connect_error_soft",
|
||||
),
|
||||
pytest.param(
|
||||
httpx.ConnectError("refused"),
|
||||
HookFailStrategy.HARD,
|
||||
OnyxError,
|
||||
False,
|
||||
id="connect_error_hard",
|
||||
),
|
||||
# 401/403 → is_reachable=False (api_key revoked)
|
||||
pytest.param(
|
||||
httpx.HTTPStatusError(
|
||||
"401",
|
||||
request=MagicMock(),
|
||||
response=MagicMock(status_code=401, text="Unauthorized"),
|
||||
),
|
||||
HookFailStrategy.SOFT,
|
||||
HookSoftFailed,
|
||||
False,
|
||||
id="auth_401_soft",
|
||||
),
|
||||
pytest.param(
|
||||
httpx.HTTPStatusError(
|
||||
"403",
|
||||
request=MagicMock(),
|
||||
response=MagicMock(status_code=403, text="Forbidden"),
|
||||
),
|
||||
HookFailStrategy.HARD,
|
||||
OnyxError,
|
||||
False,
|
||||
id="auth_403_hard",
|
||||
),
|
||||
# TimeoutException → no is_reachable write (None)
|
||||
pytest.param(
|
||||
httpx.TimeoutException("timeout"),
|
||||
HookFailStrategy.SOFT,
|
||||
HookSoftFailed,
|
||||
None,
|
||||
id="timeout_soft",
|
||||
),
|
||||
pytest.param(
|
||||
httpx.TimeoutException("timeout"),
|
||||
HookFailStrategy.HARD,
|
||||
OnyxError,
|
||||
None,
|
||||
id="timeout_hard",
|
||||
),
|
||||
# Other HTTP errors → no is_reachable write (None)
|
||||
pytest.param(
|
||||
httpx.HTTPStatusError(
|
||||
"500",
|
||||
request=MagicMock(),
|
||||
response=MagicMock(status_code=500, text="error"),
|
||||
),
|
||||
HookFailStrategy.SOFT,
|
||||
HookSoftFailed,
|
||||
None,
|
||||
id="http_status_error_soft",
|
||||
),
|
||||
pytest.param(
|
||||
httpx.HTTPStatusError(
|
||||
"500",
|
||||
request=MagicMock(),
|
||||
response=MagicMock(status_code=500, text="error"),
|
||||
),
|
||||
HookFailStrategy.HARD,
|
||||
OnyxError,
|
||||
None,
|
||||
id="http_status_error_hard",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_http_failure_paths(
|
||||
db_session: MagicMock,
|
||||
exception: Exception,
|
||||
fail_strategy: HookFailStrategy,
|
||||
expected_type: type,
|
||||
expected_is_reachable: bool | None,
|
||||
) -> None:
|
||||
hook = _make_hook(fail_strategy=fail_strategy)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(mock_client_cls, side_effect=exception)
|
||||
|
||||
if expected_type is OnyxError:
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
|
||||
else:
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
assert isinstance(result, expected_type)
|
||||
|
||||
if expected_is_reachable is None:
|
||||
mock_update.assert_not_called()
|
||||
else:
|
||||
mock_update.assert_called_once()
|
||||
_, kwargs = mock_update.call_args
|
||||
assert kwargs["is_reachable"] is expected_is_reachable
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Authorization header
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_key_value,expect_auth_header",
|
||||
[
|
||||
pytest.param("secret-token", True, id="api_key_present"),
|
||||
pytest.param(None, False, id="api_key_absent"),
|
||||
],
|
||||
)
|
||||
def test_authorization_header(
|
||||
db_session: MagicMock,
|
||||
api_key_value: str | None,
|
||||
expect_auth_header: bool,
|
||||
) -> None:
|
||||
api_key = _make_api_key(api_key_value) if api_key_value else None
|
||||
hook = _make_hook(api_key=api_key)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch("onyx.hooks.executor.update_hook__no_commit"),
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
mock_client = _setup_client(mock_client_cls, response=_make_response())
|
||||
execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
_, call_kwargs = mock_client.post.call_args
|
||||
if expect_auth_header:
|
||||
assert call_kwargs["headers"]["Authorization"] == f"Bearer {api_key_value}"
|
||||
else:
|
||||
assert "Authorization" not in call_kwargs["headers"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist session failure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"http_exception,expected_result",
|
||||
[
|
||||
pytest.param(None, _RESPONSE_PAYLOAD, id="success_path"),
|
||||
pytest.param(httpx.ConnectError("refused"), OnyxError, id="hard_fail_path"),
|
||||
],
|
||||
)
|
||||
def test_persist_session_failure_is_swallowed(
|
||||
db_session: MagicMock,
|
||||
http_exception: Exception | None,
|
||||
expected_result: Any,
|
||||
) -> None:
|
||||
"""DB session failure in _persist_result must not mask the real return value or OnyxError."""
|
||||
hook = _make_hook(fail_strategy=HookFailStrategy.HARD)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_session_with_current_tenant",
|
||||
side_effect=RuntimeError("DB unavailable"),
|
||||
),
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(
|
||||
mock_client_cls,
|
||||
response=_make_response() if not http_exception else None,
|
||||
side_effect=http_exception,
|
||||
)
|
||||
|
||||
if expected_result is OnyxError:
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
assert exc_info.value.error_code is OnyxErrorCode.HOOK_EXECUTION_FAILED
|
||||
else:
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
assert result == expected_result
|
||||
|
||||
|
||||
def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> None:
|
||||
"""is_reachable update failing (e.g. concurrent hook deletion) must not
|
||||
prevent the execution log from being written.
|
||||
|
||||
Simulates the production failure path: update_hook__no_commit raises
|
||||
OnyxError(NOT_FOUND) as it would if the hook was concurrently deleted
|
||||
between the initial lookup and the reachable update.
|
||||
"""
|
||||
hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
with (
|
||||
patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
|
||||
patch(
|
||||
"onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
|
||||
return_value=hook,
|
||||
),
|
||||
patch("onyx.hooks.executor.get_session_with_current_tenant"),
|
||||
patch(
|
||||
"onyx.hooks.executor.update_hook__no_commit",
|
||||
side_effect=OnyxError(OnyxErrorCode.NOT_FOUND, "hook deleted"),
|
||||
),
|
||||
patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
|
||||
patch("httpx.Client") as mock_client_cls,
|
||||
):
|
||||
_setup_client(mock_client_cls, side_effect=httpx.ConnectError("refused"))
|
||||
result = execute_hook(
|
||||
db_session=db_session,
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
payload=_PAYLOAD,
|
||||
)
|
||||
|
||||
assert isinstance(result, HookSoftFailed)
|
||||
mock_log.assert_called_once()
|
||||
86
backend/tests/unit/onyx/hooks/test_models.py
Normal file
86
backend/tests/unit/onyx/hooks/test_models.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks.models import HookCreateRequest
|
||||
from onyx.hooks.models import HookUpdateRequest
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_empty() -> None:
|
||||
# No fields supplied at all
|
||||
with pytest.raises(ValidationError, match="At least one field must be provided"):
|
||||
HookUpdateRequest()
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_null_name_when_only_field() -> None:
|
||||
# Explicitly setting name=None is rejected as name cannot be cleared
|
||||
with pytest.raises(ValidationError, match="name cannot be cleared"):
|
||||
HookUpdateRequest(name=None)
|
||||
|
||||
|
||||
def test_hook_update_request_accepts_single_field() -> None:
|
||||
req = HookUpdateRequest(name="new name")
|
||||
assert req.name == "new name"
|
||||
|
||||
|
||||
def test_hook_update_request_accepts_partial_fields() -> None:
|
||||
req = HookUpdateRequest(fail_strategy=HookFailStrategy.SOFT, timeout_seconds=10.0)
|
||||
assert req.fail_strategy == HookFailStrategy.SOFT
|
||||
assert req.timeout_seconds == 10.0
|
||||
assert req.name is None
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_null_name() -> None:
|
||||
with pytest.raises(ValidationError, match="name cannot be cleared"):
|
||||
HookUpdateRequest(name=None, fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_empty_name() -> None:
|
||||
with pytest.raises(ValidationError, match="name cannot be cleared"):
|
||||
HookUpdateRequest(name="", fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_null_endpoint_url() -> None:
|
||||
with pytest.raises(ValidationError, match="endpoint_url cannot be cleared"):
|
||||
HookUpdateRequest(endpoint_url=None, fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_empty_endpoint_url() -> None:
|
||||
with pytest.raises(ValidationError, match="endpoint_url cannot be cleared"):
|
||||
HookUpdateRequest(endpoint_url="", fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
|
||||
def test_hook_update_request_allows_null_api_key() -> None:
|
||||
# api_key=null is valid — means "clear the api key"
|
||||
req = HookUpdateRequest(api_key=None)
|
||||
assert req.api_key is None
|
||||
assert "api_key" in req.model_fields_set
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_whitespace_name() -> None:
|
||||
with pytest.raises(ValidationError, match="name cannot be cleared"):
|
||||
HookUpdateRequest(name=" ", fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
|
||||
def test_hook_update_request_rejects_whitespace_endpoint_url() -> None:
|
||||
with pytest.raises(ValidationError, match="endpoint_url cannot be cleared"):
|
||||
HookUpdateRequest(endpoint_url=" ", fail_strategy=HookFailStrategy.SOFT)
|
||||
|
||||
|
||||
def test_hook_create_request_rejects_whitespace_name() -> None:
|
||||
with pytest.raises(ValidationError, match="whitespace-only"):
|
||||
HookCreateRequest(
|
||||
name=" ",
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
endpoint_url="https://example.com/hook",
|
||||
)
|
||||
|
||||
|
||||
def test_hook_create_request_rejects_whitespace_endpoint_url() -> None:
|
||||
with pytest.raises(ValidationError, match="whitespace-only"):
|
||||
HookCreateRequest(
|
||||
name="my hook",
|
||||
hook_point=HookPoint.QUERY_PROCESSING,
|
||||
endpoint_url=" ",
|
||||
)
|
||||
60
backend/tests/unit/onyx/hooks/test_query_processing_spec.py
Normal file
60
backend/tests/unit/onyx/hooks/test_query_processing_spec.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from onyx.db.enums import HookFailStrategy
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks.points.query_processing import QueryProcessingSpec
|
||||
|
||||
|
||||
def test_hook_point_is_query_processing() -> None:
|
||||
assert QueryProcessingSpec().hook_point == HookPoint.QUERY_PROCESSING
|
||||
|
||||
|
||||
def test_default_fail_strategy_is_hard() -> None:
|
||||
assert QueryProcessingSpec().default_fail_strategy == HookFailStrategy.HARD
|
||||
|
||||
|
||||
def test_default_timeout_seconds() -> None:
|
||||
# User is actively waiting — 5s is the documented contract for this hook point
|
||||
assert QueryProcessingSpec().default_timeout_seconds == 5.0
|
||||
|
||||
|
||||
def test_input_schema_required_fields() -> None:
|
||||
schema = QueryProcessingSpec().input_schema
|
||||
assert schema["type"] == "object"
|
||||
required = schema["required"]
|
||||
assert "query" in required
|
||||
assert "user_email" in required
|
||||
assert "chat_session_id" in required
|
||||
|
||||
|
||||
def test_input_schema_chat_session_id_is_string() -> None:
|
||||
props = QueryProcessingSpec().input_schema["properties"]
|
||||
assert props["chat_session_id"]["type"] == "string"
|
||||
|
||||
|
||||
def test_input_schema_query_is_string() -> None:
|
||||
props = QueryProcessingSpec().input_schema["properties"]
|
||||
assert props["query"]["type"] == "string"
|
||||
|
||||
|
||||
def test_input_schema_user_email_is_nullable() -> None:
|
||||
props = QueryProcessingSpec().input_schema["properties"]
|
||||
assert "null" in props["user_email"]["type"]
|
||||
|
||||
|
||||
def test_output_schema_query_is_required() -> None:
|
||||
schema = QueryProcessingSpec().output_schema
|
||||
assert "query" in schema["required"]
|
||||
|
||||
|
||||
def test_output_schema_query_is_nullable() -> None:
|
||||
# null means "reject the query"
|
||||
props = QueryProcessingSpec().output_schema["properties"]
|
||||
assert "null" in props["query"]["type"]
|
||||
|
||||
|
||||
def test_output_schema_rejection_message_is_optional() -> None:
|
||||
schema = QueryProcessingSpec().output_schema
|
||||
assert "rejection_message" not in schema.get("required", [])
|
||||
|
||||
|
||||
def test_input_schema_no_additional_properties() -> None:
|
||||
assert QueryProcessingSpec().input_schema.get("additionalProperties") is False
|
||||
47
backend/tests/unit/onyx/hooks/test_registry.py
Normal file
47
backend/tests/unit/onyx/hooks/test_registry.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import pytest
|
||||
|
||||
from onyx.db.enums import HookPoint
|
||||
from onyx.hooks import registry as registry_module
|
||||
from onyx.hooks.registry import get_all_specs
|
||||
from onyx.hooks.registry import get_hook_point_spec
|
||||
from onyx.hooks.registry import validate_registry
|
||||
|
||||
|
||||
def test_registry_covers_all_hook_points() -> None:
|
||||
"""Every HookPoint enum member must have a registered spec."""
|
||||
assert {s.hook_point for s in get_all_specs()} == set(
|
||||
HookPoint
|
||||
), f"Missing specs for: {set(HookPoint) - {s.hook_point for s in get_all_specs()}}"
|
||||
|
||||
|
||||
def test_get_hook_point_spec_returns_correct_spec() -> None:
|
||||
for hook_point in HookPoint:
|
||||
spec = get_hook_point_spec(hook_point)
|
||||
assert spec.hook_point == hook_point
|
||||
|
||||
|
||||
def test_get_all_specs_returns_all() -> None:
|
||||
specs = get_all_specs()
|
||||
assert len(specs) == len(HookPoint)
|
||||
assert {s.hook_point for s in specs} == set(HookPoint)
|
||||
|
||||
|
||||
def test_get_hook_point_spec_raises_for_unregistered(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""get_hook_point_spec raises ValueError when a hook point has no spec."""
|
||||
monkeypatch.setattr(registry_module, "_REGISTRY", {})
|
||||
with pytest.raises(ValueError, match="No spec registered for hook point"):
|
||||
get_hook_point_spec(HookPoint.QUERY_PROCESSING)
|
||||
|
||||
|
||||
def test_validate_registry_passes() -> None:
|
||||
validate_registry() # should not raise with the real registry
|
||||
|
||||
|
||||
def test_validate_registry_raises_for_incomplete(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(registry_module, "_REGISTRY", {})
|
||||
with pytest.raises(RuntimeError, match="Hook point\\(s\\) have no registered spec"):
|
||||
validate_registry()
|
||||
@@ -0,0 +1,130 @@
|
||||
"""
|
||||
Unit tests for vision model selection logging in get_default_llm_with_vision.
|
||||
|
||||
Verifies that operators get clear feedback about:
|
||||
1. Which vision model was selected and why
|
||||
2. When the default vision model doesn't support image input
|
||||
3. When no vision-capable model exists at all
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.llm.factory import get_default_llm_with_vision
|
||||
|
||||
|
||||
_FACTORY = "onyx.llm.factory"
|
||||
|
||||
|
||||
def _make_mock_model(
|
||||
*,
|
||||
name: str = "gpt-4o",
|
||||
provider: str = "openai",
|
||||
provider_id: int = 1,
|
||||
flow_types: list[str] | None = None,
|
||||
) -> MagicMock:
|
||||
model = MagicMock()
|
||||
model.name = name
|
||||
model.llm_provider_id = provider_id
|
||||
model.llm_provider.provider = provider
|
||||
model.llm_model_flow_types = flow_types or []
|
||||
return model
|
||||
|
||||
|
||||
@patch(f"{_FACTORY}.get_session_with_current_tenant")
|
||||
@patch(f"{_FACTORY}.fetch_default_vision_model")
|
||||
@patch(f"{_FACTORY}.model_supports_image_input", return_value=True)
|
||||
@patch(f"{_FACTORY}.llm_from_provider")
|
||||
@patch(f"{_FACTORY}.LLMProviderView")
|
||||
@patch(f"{_FACTORY}.logger")
|
||||
def test_logs_when_using_default_vision_model(
|
||||
mock_logger: MagicMock,
|
||||
mock_provider_view: MagicMock, # noqa: ARG001
|
||||
mock_llm_from: MagicMock, # noqa: ARG001
|
||||
mock_supports: MagicMock, # noqa: ARG001
|
||||
mock_fetch_default: MagicMock,
|
||||
mock_session: MagicMock, # noqa: ARG001
|
||||
) -> None:
|
||||
mock_fetch_default.return_value = _make_mock_model(name="gpt-4o", provider="azure")
|
||||
|
||||
get_default_llm_with_vision()
|
||||
|
||||
mock_logger.info.assert_called_once()
|
||||
log_msg = mock_logger.info.call_args[0][0]
|
||||
assert "default vision model" in log_msg.lower()
|
||||
|
||||
|
||||
@patch(f"{_FACTORY}.get_session_with_current_tenant")
|
||||
@patch(f"{_FACTORY}.fetch_default_vision_model")
|
||||
@patch(f"{_FACTORY}.model_supports_image_input", return_value=False)
|
||||
@patch(f"{_FACTORY}.fetch_existing_models", return_value=[])
|
||||
@patch(f"{_FACTORY}.logger")
|
||||
def test_warns_when_default_model_lacks_vision(
|
||||
mock_logger: MagicMock,
|
||||
mock_fetch_models: MagicMock, # noqa: ARG001
|
||||
mock_supports: MagicMock, # noqa: ARG001
|
||||
mock_fetch_default: MagicMock,
|
||||
mock_session: MagicMock, # noqa: ARG001
|
||||
) -> None:
|
||||
mock_fetch_default.return_value = _make_mock_model(
|
||||
name="text-only-model", provider="azure"
|
||||
)
|
||||
|
||||
result = get_default_llm_with_vision()
|
||||
|
||||
assert result is None
|
||||
# Should have warned about the default model not supporting vision
|
||||
warning_calls = [
|
||||
call
|
||||
for call in mock_logger.warning.call_args_list
|
||||
if "does not support" in str(call)
|
||||
]
|
||||
assert len(warning_calls) >= 1
|
||||
|
||||
|
||||
@patch(f"{_FACTORY}.get_session_with_current_tenant")
|
||||
@patch(f"{_FACTORY}.fetch_default_vision_model", return_value=None)
|
||||
@patch(f"{_FACTORY}.fetch_existing_models", return_value=[])
|
||||
@patch(f"{_FACTORY}.logger")
|
||||
def test_warns_when_no_models_exist(
|
||||
mock_logger: MagicMock,
|
||||
mock_fetch_models: MagicMock, # noqa: ARG001
|
||||
mock_fetch_default: MagicMock, # noqa: ARG001
|
||||
mock_session: MagicMock, # noqa: ARG001
|
||||
) -> None:
|
||||
result = get_default_llm_with_vision()
|
||||
|
||||
assert result is None
|
||||
mock_logger.warning.assert_called_once()
|
||||
log_msg = mock_logger.warning.call_args[0][0]
|
||||
assert "no llm models" in log_msg.lower()
|
||||
|
||||
|
||||
@patch(f"{_FACTORY}.get_session_with_current_tenant")
|
||||
@patch(f"{_FACTORY}.fetch_default_vision_model", return_value=None)
|
||||
@patch(f"{_FACTORY}.fetch_existing_models")
|
||||
@patch(f"{_FACTORY}.model_supports_image_input", return_value=False)
|
||||
@patch(f"{_FACTORY}.LLMProviderView")
|
||||
@patch(f"{_FACTORY}.logger")
|
||||
def test_warns_when_no_model_supports_vision(
|
||||
mock_logger: MagicMock,
|
||||
mock_provider_view: MagicMock, # noqa: ARG001
|
||||
mock_supports: MagicMock, # noqa: ARG001
|
||||
mock_fetch_models: MagicMock,
|
||||
mock_fetch_default: MagicMock, # noqa: ARG001
|
||||
mock_session: MagicMock, # noqa: ARG001
|
||||
) -> None:
|
||||
mock_fetch_models.return_value = [
|
||||
_make_mock_model(name="text-model-1", provider="openai"),
|
||||
_make_mock_model(name="text-model-2", provider="azure", provider_id=2),
|
||||
]
|
||||
|
||||
result = get_default_llm_with_vision()
|
||||
|
||||
assert result is None
|
||||
warning_calls = [
|
||||
call
|
||||
for call in mock_logger.warning.call_args_list
|
||||
if "no vision-capable model" in str(call).lower()
|
||||
]
|
||||
assert len(warning_calls) == 1
|
||||
278
backend/tests/unit/onyx/server/features/hooks/test_api.py
Normal file
278
backend/tests/unit/onyx/server/features/hooks/test_api.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""Unit tests for onyx.server.features.hooks.api helpers.
|
||||
|
||||
Covers:
|
||||
- _check_ssrf_safety: scheme enforcement and private-IP blocklist
|
||||
- _validate_endpoint: httpx exception → HookValidateStatus mapping
|
||||
ConnectTimeout → cannot_connect (TCP handshake never completed)
|
||||
ConnectError → cannot_connect (DNS / TLS failure)
|
||||
ReadTimeout et al. → timeout (TCP connected, server slow)
|
||||
Any other exc → cannot_connect
|
||||
- _raise_for_validation_failure: HookValidateStatus → OnyxError mapping
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.hooks.models import HookValidateResponse
|
||||
from onyx.hooks.models import HookValidateStatus
|
||||
from onyx.server.features.hooks.api import _check_ssrf_safety
|
||||
from onyx.server.features.hooks.api import _raise_for_validation_failure
|
||||
from onyx.server.features.hooks.api import _validate_endpoint
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_URL = "https://example.com/hook"
|
||||
_API_KEY = "secret"
|
||||
_TIMEOUT = 5.0
|
||||
|
||||
|
||||
def _mock_response(status_code: int) -> MagicMock:
|
||||
response = MagicMock()
|
||||
response.status_code = status_code
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _check_ssrf_safety
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCheckSsrfSafety:
|
||||
def _call(self, url: str) -> None:
|
||||
_check_ssrf_safety(url)
|
||||
|
||||
# --- scheme checks ---
|
||||
|
||||
def test_https_is_allowed(self) -> None:
|
||||
with patch("onyx.server.features.hooks.api.socket.getaddrinfo") as mock_dns:
|
||||
mock_dns.return_value = [(None, None, None, None, ("93.184.216.34", 0))]
|
||||
self._call("https://example.com/hook") # must not raise
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url", ["http://example.com/hook", "ftp://example.com/hook"]
|
||||
)
|
||||
def test_non_https_scheme_rejected(self, url: str) -> None:
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
self._call(url)
|
||||
assert exc_info.value.error_code == OnyxErrorCode.INVALID_INPUT
|
||||
assert "https" in (exc_info.value.detail or "").lower()
|
||||
|
||||
# --- private IP blocklist ---
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ip",
|
||||
[
|
||||
pytest.param("127.0.0.1", id="loopback"),
|
||||
pytest.param("10.0.0.1", id="RFC1918-A"),
|
||||
pytest.param("172.16.0.1", id="RFC1918-B"),
|
||||
pytest.param("192.168.1.1", id="RFC1918-C"),
|
||||
pytest.param("169.254.169.254", id="link-local-IMDS"),
|
||||
pytest.param("100.64.0.1", id="shared-address-space"),
|
||||
pytest.param("::1", id="IPv6-loopback"),
|
||||
pytest.param("fc00::1", id="IPv6-ULA"),
|
||||
pytest.param("fe80::1", id="IPv6-link-local"),
|
||||
],
|
||||
)
|
||||
def test_private_ip_is_blocked(self, ip: str) -> None:
|
||||
with (
|
||||
patch("onyx.server.features.hooks.api.socket.getaddrinfo") as mock_dns,
|
||||
pytest.raises(OnyxError) as exc_info,
|
||||
):
|
||||
mock_dns.return_value = [(None, None, None, None, (ip, 0))]
|
||||
self._call("https://internal.example.com/hook")
|
||||
assert exc_info.value.error_code == OnyxErrorCode.INVALID_INPUT
|
||||
assert ip in (exc_info.value.detail or "")
|
||||
|
||||
def test_public_ip_is_allowed(self) -> None:
|
||||
with patch("onyx.server.features.hooks.api.socket.getaddrinfo") as mock_dns:
|
||||
mock_dns.return_value = [(None, None, None, None, ("93.184.216.34", 0))]
|
||||
self._call("https://example.com/hook") # must not raise
|
||||
|
||||
def test_dns_resolution_failure_raises(self) -> None:
|
||||
import socket
|
||||
|
||||
with (
|
||||
patch(
|
||||
"onyx.server.features.hooks.api.socket.getaddrinfo",
|
||||
side_effect=socket.gaierror("name not found"),
|
||||
),
|
||||
pytest.raises(OnyxError) as exc_info,
|
||||
):
|
||||
self._call("https://no-such-host.example.com/hook")
|
||||
assert exc_info.value.error_code == OnyxErrorCode.INVALID_INPUT
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _validate_endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestValidateEndpoint:
|
||||
def _call(self, *, api_key: str | None = _API_KEY) -> HookValidateResponse:
|
||||
# Bypass SSRF check — tested separately in TestCheckSsrfSafety.
|
||||
with patch("onyx.server.features.hooks.api._check_ssrf_safety"):
|
||||
return _validate_endpoint(
|
||||
endpoint_url=_URL,
|
||||
api_key=api_key,
|
||||
timeout_seconds=_TIMEOUT,
|
||||
)
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_2xx_returns_passed(self, mock_client_cls: MagicMock) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = (
|
||||
_mock_response(200)
|
||||
)
|
||||
assert self._call().status == HookValidateStatus.passed
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_5xx_returns_passed(self, mock_client_cls: MagicMock) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = (
|
||||
_mock_response(500)
|
||||
)
|
||||
assert self._call().status == HookValidateStatus.passed
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
@pytest.mark.parametrize("status_code", [401, 403])
|
||||
def test_401_403_returns_auth_failed(
|
||||
self, mock_client_cls: MagicMock, status_code: int
|
||||
) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = (
|
||||
_mock_response(status_code)
|
||||
)
|
||||
result = self._call()
|
||||
assert result.status == HookValidateStatus.auth_failed
|
||||
assert str(status_code) in (result.error_message or "")
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_4xx_non_auth_returns_passed(self, mock_client_cls: MagicMock) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = (
|
||||
_mock_response(422)
|
||||
)
|
||||
assert self._call().status == HookValidateStatus.passed
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_connect_timeout_returns_cannot_connect(
|
||||
self, mock_client_cls: MagicMock
|
||||
) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
|
||||
httpx.ConnectTimeout("timed out")
|
||||
)
|
||||
assert self._call().status == HookValidateStatus.cannot_connect
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
@pytest.mark.parametrize(
|
||||
"exc",
|
||||
[
|
||||
httpx.ReadTimeout("read timeout"),
|
||||
httpx.WriteTimeout("write timeout"),
|
||||
httpx.PoolTimeout("pool timeout"),
|
||||
],
|
||||
)
|
||||
def test_read_write_pool_timeout_returns_timeout(
|
||||
self, mock_client_cls: MagicMock, exc: httpx.TimeoutException
|
||||
) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = exc
|
||||
assert self._call().status == HookValidateStatus.timeout
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_connect_error_returns_cannot_connect(
|
||||
self, mock_client_cls: MagicMock
|
||||
) -> None:
|
||||
# Covers DNS failures, TLS errors, and other connection-level errors.
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
|
||||
httpx.ConnectError("name resolution failed")
|
||||
)
|
||||
assert self._call().status == HookValidateStatus.cannot_connect
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_arbitrary_exception_returns_cannot_connect(
|
||||
self, mock_client_cls: MagicMock
|
||||
) -> None:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
|
||||
ConnectionRefusedError("refused")
|
||||
)
|
||||
assert self._call().status == HookValidateStatus.cannot_connect
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_api_key_sent_as_bearer(self, mock_client_cls: MagicMock) -> None:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = _mock_response(200)
|
||||
self._call(api_key="mykey")
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs["headers"]["Authorization"] == "Bearer mykey"
|
||||
|
||||
@patch("onyx.server.features.hooks.api.httpx.Client")
|
||||
def test_no_api_key_omits_auth_header(self, mock_client_cls: MagicMock) -> None:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = _mock_response(200)
|
||||
self._call(api_key=None)
|
||||
_, kwargs = mock_post.call_args
|
||||
assert "Authorization" not in kwargs["headers"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _raise_for_validation_failure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRaiseForValidationFailure:
|
||||
@pytest.mark.parametrize(
|
||||
"status, expected_code",
|
||||
[
|
||||
(HookValidateStatus.auth_failed, OnyxErrorCode.CREDENTIAL_INVALID),
|
||||
(HookValidateStatus.timeout, OnyxErrorCode.GATEWAY_TIMEOUT),
|
||||
(HookValidateStatus.cannot_connect, OnyxErrorCode.BAD_GATEWAY),
|
||||
],
|
||||
)
|
||||
def test_raises_correct_error_code(
|
||||
self, status: HookValidateStatus, expected_code: OnyxErrorCode
|
||||
) -> None:
|
||||
validation = HookValidateResponse(status=status, error_message="some error")
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
_raise_for_validation_failure(validation)
|
||||
assert exc_info.value.error_code == expected_code
|
||||
|
||||
def test_auth_failed_passes_error_message_directly(self) -> None:
|
||||
validation = HookValidateResponse(
|
||||
status=HookValidateStatus.auth_failed, error_message="bad credentials"
|
||||
)
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
_raise_for_validation_failure(validation)
|
||||
assert exc_info.value.detail == "bad credentials"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"status", [HookValidateStatus.timeout, HookValidateStatus.cannot_connect]
|
||||
)
|
||||
def test_timeout_and_cannot_connect_wrap_error_message(
|
||||
self, status: HookValidateStatus
|
||||
) -> None:
|
||||
validation = HookValidateResponse(status=status, error_message="raw error")
|
||||
with pytest.raises(OnyxError) as exc_info:
|
||||
_raise_for_validation_failure(validation)
|
||||
assert exc_info.value.detail == "Endpoint validation failed: raw error"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HookValidateStatus enum string values (API contract)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHookValidateStatusValues:
|
||||
@pytest.mark.parametrize(
|
||||
"status, expected",
|
||||
[
|
||||
(HookValidateStatus.passed, "passed"),
|
||||
(HookValidateStatus.auth_failed, "auth_failed"),
|
||||
(HookValidateStatus.timeout, "timeout"),
|
||||
(HookValidateStatus.cannot_connect, "cannot_connect"),
|
||||
],
|
||||
)
|
||||
def test_string_values(self, status: HookValidateStatus, expected: str) -> None:
|
||||
assert status == expected
|
||||
@@ -1,4 +1,3 @@
|
||||
import "@opal/components/buttons/button/styles.css";
|
||||
import "@opal/components/tooltip.css";
|
||||
import { Interactive, type InteractiveStatelessProps } from "@opal/core";
|
||||
import type { ContainerSizeVariants, ExtremaSizeVariants } from "@opal/types";
|
||||
@@ -67,7 +66,7 @@ function Button({
|
||||
const labelEl = children ? (
|
||||
<span
|
||||
className={cn(
|
||||
"opal-button-label",
|
||||
"whitespace-nowrap",
|
||||
isLarge ? "font-main-ui-body " : "font-secondary-body",
|
||||
responsiveHideText && "hidden md:inline"
|
||||
)}
|
||||
@@ -87,7 +86,7 @@ function Button({
|
||||
isLarge ? "default" : size === "2xs" ? "mini" : "compact"
|
||||
}
|
||||
>
|
||||
<div className={cn("opal-button interactive-foreground")}>
|
||||
<div className="flex flex-row items-center gap-1 interactive-foreground">
|
||||
{iconWrapper(Icon, size, !!children)}
|
||||
|
||||
{labelEl}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
/* Button — layout only; colors handled by Interactive.Stateless */
|
||||
|
||||
.opal-button {
|
||||
@apply flex flex-row items-center gap-1;
|
||||
}
|
||||
|
||||
.opal-button-label {
|
||||
@apply whitespace-nowrap;
|
||||
}
|
||||
@@ -118,7 +118,7 @@ function OpenButton({
|
||||
const labelEl = children ? (
|
||||
<span
|
||||
className={cn(
|
||||
"opal-button-label whitespace-nowrap",
|
||||
"whitespace-nowrap",
|
||||
isLarge ? "font-main-ui-body" : "font-secondary-body"
|
||||
)}
|
||||
>
|
||||
@@ -143,7 +143,7 @@ function OpenButton({
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
"opal-button interactive-foreground flex flex-row items-center",
|
||||
"interactive-foreground flex flex-row items-center",
|
||||
justifyContent === "between" ? "w-full justify-between" : "gap-1",
|
||||
foldable &&
|
||||
justifyContent !== "between" &&
|
||||
|
||||
@@ -98,7 +98,14 @@ export function IndexAttemptsTable({
|
||||
isReindexInProgress ? "are being" : "were"
|
||||
} synced into the system.`;
|
||||
return (
|
||||
<TableRow key={indexAttempt.id}>
|
||||
<TableRow
|
||||
key={indexAttempt.id}
|
||||
className={
|
||||
indexAttempt.full_exception_trace
|
||||
? "hover:bg-accent-background cursor-pointer relative select-none"
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
<TableCell>
|
||||
{indexAttempt.time_started
|
||||
? localizeAndPrettify(indexAttempt.time_started)
|
||||
@@ -146,46 +153,43 @@ export function IndexAttemptsTable({
|
||||
</div>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<div>
|
||||
{indexAttempt.status === "success" && (
|
||||
{indexAttempt.status === "success" && (
|
||||
<Text className="flex flex-wrap whitespace-normal">
|
||||
{"-"}
|
||||
</Text>
|
||||
)}
|
||||
|
||||
{indexAttempt.status === "failed" &&
|
||||
indexAttempt.error_msg && (
|
||||
<Text className="flex flex-wrap whitespace-normal">
|
||||
{"-"}
|
||||
{indexAttempt.error_msg}
|
||||
</Text>
|
||||
)}
|
||||
|
||||
{indexAttempt.status === "failed" &&
|
||||
indexAttempt.error_msg && (
|
||||
<Text className="flex flex-wrap whitespace-normal">
|
||||
{indexAttempt.error_msg}
|
||||
</Text>
|
||||
)}
|
||||
|
||||
{indexAttempt.full_exception_trace && (
|
||||
<div
|
||||
onClick={() => {
|
||||
setIndexAttemptTracePopupId(indexAttempt.id);
|
||||
}}
|
||||
className="mt-2 text-link cursor-pointer select-none"
|
||||
>
|
||||
View Full Trace
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</TableCell>
|
||||
<td className="w-0 p-0">
|
||||
{indexAttempt.full_exception_trace && (
|
||||
<button
|
||||
type="button"
|
||||
aria-label="View full trace"
|
||||
onClick={() =>
|
||||
setIndexAttemptTracePopupId(indexAttempt.id)
|
||||
}
|
||||
className="absolute w-full h-full left-0 top-0"
|
||||
/>
|
||||
)}
|
||||
</td>
|
||||
</TableRow>
|
||||
);
|
||||
})}
|
||||
</TableBody>
|
||||
</Table>
|
||||
{totalPages > 1 && (
|
||||
<div className="mt-3 flex">
|
||||
<div className="mx-auto">
|
||||
<PageSelector
|
||||
totalPages={totalPages}
|
||||
currentPage={currentPage}
|
||||
onPageChange={onPageChange}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-1 justify-center pt-3">
|
||||
<PageSelector
|
||||
totalPages={totalPages}
|
||||
currentPage={currentPage}
|
||||
onPageChange={onPageChange}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
|
||||
@@ -661,7 +661,7 @@ export default function AgentEditorPage({
|
||||
// Sharing
|
||||
shared_user_ids: existingAgent?.users?.map((user) => user.id) ?? [],
|
||||
shared_group_ids: existingAgent?.groups ?? [],
|
||||
is_public: existingAgent?.is_public ?? true,
|
||||
is_public: existingAgent?.is_public ?? false,
|
||||
label_ids: existingAgent?.labels?.map((l) => l.id) ?? [],
|
||||
featured: existingAgent?.featured ?? false,
|
||||
};
|
||||
|
||||
83
web/src/sections/modals/ShareAgentModal.test.tsx
Normal file
83
web/src/sections/modals/ShareAgentModal.test.tsx
Normal file
@@ -0,0 +1,83 @@
|
||||
import React, { useEffect } from "react";
|
||||
import { render, screen, waitFor } from "@tests/setup/test-utils";
|
||||
import ShareAgentModal, { ShareAgentModalProps } from "./ShareAgentModal";
|
||||
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
|
||||
|
||||
jest.mock("@/hooks/useShareableUsers", () => ({
|
||||
__esModule: true,
|
||||
default: jest.fn(() => ({ data: [] })),
|
||||
}));
|
||||
|
||||
jest.mock("@/hooks/useShareableGroups", () => ({
|
||||
__esModule: true,
|
||||
default: jest.fn(() => ({ data: [] })),
|
||||
}));
|
||||
|
||||
jest.mock("@/hooks/useAgents", () => ({
|
||||
useAgent: jest.fn(() => ({ agent: null })),
|
||||
}));
|
||||
|
||||
jest.mock("@/lib/hooks", () => ({
|
||||
useLabels: jest.fn(() => ({
|
||||
labels: [],
|
||||
createLabel: jest.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
function ModalHarness(props: ShareAgentModalProps) {
|
||||
const modal = useCreateModal();
|
||||
|
||||
useEffect(() => {
|
||||
modal.toggle(true);
|
||||
}, [modal]);
|
||||
|
||||
return (
|
||||
<modal.Provider>
|
||||
<ShareAgentModal {...props} />
|
||||
</modal.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
function renderShareAgentModal(overrides: Partial<ShareAgentModalProps> = {}) {
|
||||
const props: ShareAgentModalProps = {
|
||||
userIds: [],
|
||||
groupIds: [],
|
||||
isPublic: false,
|
||||
isFeatured: false,
|
||||
labelIds: [],
|
||||
...overrides,
|
||||
};
|
||||
|
||||
return render(<ModalHarness {...props} />);
|
||||
}
|
||||
|
||||
describe("ShareAgentModal", () => {
|
||||
it("defaults to Users & Groups when the agent is private", async () => {
|
||||
renderShareAgentModal({ isPublic: false });
|
||||
|
||||
await waitFor(() =>
|
||||
expect(
|
||||
screen.getByRole("tab", { name: "Users & Groups" })
|
||||
).toHaveAttribute("data-state", "active")
|
||||
);
|
||||
|
||||
expect(
|
||||
screen.getByRole("tab", { name: "Your Organization" })
|
||||
).toHaveAttribute("data-state", "inactive");
|
||||
});
|
||||
|
||||
it("defaults to Your Organization when the agent is public", async () => {
|
||||
renderShareAgentModal({ isPublic: true });
|
||||
|
||||
await waitFor(() =>
|
||||
expect(
|
||||
screen.getByRole("tab", { name: "Your Organization" })
|
||||
).toHaveAttribute("data-state", "active")
|
||||
);
|
||||
|
||||
expect(screen.getByRole("tab", { name: "Users & Groups" })).toHaveAttribute(
|
||||
"data-state",
|
||||
"inactive"
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -58,7 +58,7 @@ async function mockCodeInterpreterApi(
|
||||
*/
|
||||
function getDisconnectIconButton(page: Page) {
|
||||
return page
|
||||
.locator("button:has(.opal-button):not(:has(.opal-button-label))")
|
||||
.locator("button:has(.interactive-foreground-icon):not(:has(span))")
|
||||
.first();
|
||||
}
|
||||
|
||||
|
||||
@@ -1860,6 +1860,9 @@ test.describe("MCP OAuth flows", () => {
|
||||
toolName: TOOL_NAMES.admin,
|
||||
logStep,
|
||||
});
|
||||
const createdAgent = await adminApiClient.getAssistant(agentId);
|
||||
expect(createdAgent.is_public).toBe(false);
|
||||
logStep("Verified newly created agent is private by default");
|
||||
const adminToolId = await fetchMcpToolIdByName(
|
||||
page,
|
||||
serverId,
|
||||
@@ -1899,6 +1902,13 @@ test.describe("MCP OAuth flows", () => {
|
||||
).toBeVisible({ timeout: 15000 });
|
||||
logStep("Verified MCP server card is still visible on actions page");
|
||||
|
||||
await adminApiClient.updateAgentSharing(agentId, {
|
||||
isPublic: true,
|
||||
userIds: createdAgent.users.map((user) => user.id),
|
||||
groupIds: createdAgent.groups,
|
||||
});
|
||||
logStep("Published agent explicitly for end-user MCP flow");
|
||||
|
||||
adminArtifacts = {
|
||||
serverId,
|
||||
serverName,
|
||||
|
||||
@@ -709,6 +709,9 @@ export class OnyxApiClient {
|
||||
|
||||
async getAssistant(agentId: number): Promise<{
|
||||
id: number;
|
||||
is_public: boolean;
|
||||
users: Array<{ id: string }>;
|
||||
groups: number[];
|
||||
tools: Array<{ id: number; mcp_server_id?: number | null }>;
|
||||
}> {
|
||||
const response = await this.get(`/persona/${agentId}`);
|
||||
@@ -718,6 +721,37 @@ export class OnyxApiClient {
|
||||
);
|
||||
}
|
||||
|
||||
async updateAgentSharing(
|
||||
agentId: number,
|
||||
options: {
|
||||
userIds?: string[];
|
||||
groupIds?: number[];
|
||||
isPublic?: boolean;
|
||||
labelIds?: number[];
|
||||
}
|
||||
): Promise<void> {
|
||||
const response = await this.request.patch(
|
||||
`${this.baseUrl}/persona/${agentId}/share`,
|
||||
{
|
||||
data: {
|
||||
user_ids: options.userIds,
|
||||
group_ids: options.groupIds,
|
||||
is_public: options.isPublic,
|
||||
label_ids: options.labelIds,
|
||||
},
|
||||
}
|
||||
);
|
||||
await this.handleResponse(
|
||||
response,
|
||||
`Failed to update sharing for assistant ${agentId}`
|
||||
);
|
||||
this.log(
|
||||
`Updated assistant sharing: ${agentId} (is_public=${String(
|
||||
options.isPublic
|
||||
)})`
|
||||
);
|
||||
}
|
||||
|
||||
async listMcpServers(): Promise<any[]> {
|
||||
const response = await this.get(`/admin/mcp/servers`);
|
||||
const data = await this.handleResponse<{ mcp_servers: any[] }>(
|
||||
|
||||
Reference in New Issue
Block a user